diff options
Diffstat (limited to 'tnslc/parse')
-rw-r--r-- | tnslc/parse/parse.tnsl | 87 | ||||
-rw-r--r-- | tnslc/parse/token.tnsl | 331 | ||||
-rw-r--r-- | tnslc/parse/tokenizer.tnsl | 114 |
3 files changed, 0 insertions, 532 deletions
diff --git a/tnslc/parse/parse.tnsl b/tnslc/parse/parse.tnsl deleted file mode 100644 index 8919c3a..0000000 --- a/tnslc/parse/parse.tnsl +++ /dev/null @@ -1,87 +0,0 @@ -/## - Copyright 2021-2022 Kyle Gunger - - This file is licensed under the CDDL 1.0 (the License) - and may only be used in accordance with the License. - You should have received a copy of the License with this - software/source code. If you did not, a copy can be found - at the following URL: - - https://opensource.org/licenses/CDDL-1.0 - - THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO - WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE - EXPRESS OR IMPLIED -#/ - -/; export module parse - :include "parse/token.tnsl" - :include "parse/tokenizer.tnsl" -;/ - -/; create_panic ({}uint8 err) - ;tnsl.io.println("ABOUT TO INDUCE PANIC... STAND BY") - ;tnsl.io.print("Error code given: ") - ;tnsl.io.println(err) - ;{}int i = {0} - ;i{2} -;/ - -/# The various types of tokens #/ -; enum TOKEN_TYPE [int] { - LINESEP = 0, - INLNSEP = 1, - DELIMIT = 2, - AUGMENT = 3, - LITERAL = 4, - KEYTYPE = 5, - PREWORD = 6, - KEYWORD = 7, - DEFWORD = 8 -} - -/# Token struct definition #/ -; struct Token { - int - token_type, - line, - col, - - ~{}uint8 - data -} - -/; method Token - - /; print - ;tnsl.io.print("{ ") - ;tnsl.io.print(self.token_type) - ;tnsl.io.print(" ") - ;tnsl.io.print(self.data`) - ;tnsl.io.print(" ") - ;tnsl.io.print(self.line) - ;tnsl.io.print(" ") - ;tnsl.io.print(self.col) - ;tnsl.io.print(" } ") - ;/ - - /; operator delete - ;delete self.data - ;/ - - /; add_char (~{}uint8 part) - # ;uint l = len self.data` - # ;realloc self.data, l + len part - /; loop (int i = 0; i < len part`) [i++] - # ;self.data`{l + i} = part{i} - ;self.data`.append(part`{i}) - ;/ - ;/ -;/ - -/; print_tokens(~{}Token dat) - /; loop (int i = 0; i < len dat`) [i++] - ;dat`{i}.print() - ;/ - ;tnsl.io.print("\n") -;/ diff --git a/tnslc/parse/token.tnsl b/tnslc/parse/token.tnsl deleted file mode 100644 index fecc7f7..0000000 --- a/tnslc/parse/token.tnsl +++ /dev/null @@ -1,331 +0,0 @@ -/# - Copyright 2021-2022 Kyle Gunger - - This file is licensed under the CDDL 1.0 (the License) - and may only be used in accordance with the License. - You should have received a copy of the License with this - software/source code. If you did not, a copy can be found - at the following URL: - - https://opensource.org/licenses/CDDL-1.0 - - THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO - WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE - EXPRESS OR IMPLIED -#/ - -/# - Reserved words and characters, as well as - helper funcs for checking their token types. -#/ - -;{}{}uint8 PREWORDS = { - "include", - "define", - "extern", - "size", - "align", - "address", - "rootfile", - "if", - "else", - "abi" -} - -;{}{}uint8 KEYTYPES = { - "int8", - "int16", - "int32", - "int64", - "int", - - "uint8", - "uint16", - "uint32", - "uint64", - "uint", - - "float32", - "float64", - "float", - - "bool", - "void", - "type" -} - -;{}{}uint8 KEYWORDS = { - "struct", - "interface", - "enum", - "extends", - - "loop", - "continue", - "break", - - "match", - "case", - "default", - - "label", - "goto", - - "if", - "else", - - "const", - "static", - "volatile", - - "method", - "override", - "operator", - - "raw", - "asm", - "inline", - "virtual", - - "delete", - "alloc", - "salloc", - "realloc", - - "module", - "export" -} - -;{}{}uint8 LITERALS = { - "true", - "false", - - "null", - - "self", - "super" -} - -;{}uint8 RESERVED = "`~!#%^&*()-=+[]{}|;:,.<>/" - -;{}uint8 DELIMITS = "()[]{}" -;{}uint8 LINESEPS = ";:#" -;{}uint8 INLNSEPS = "," -;{}uint8 AUGMENTS = "~`.&|^><!+-*/%" - -;{}{}uint8 MDELIMITS = { - # Code block - "/;", - ";/", - - # Comment block - "/#", - "#/", - - # Preproc block - "/:", - ":/", - - # Redef blocks - ";;", - "::", - ";#", - ":#", - "#;", - "#:" -} - -;{}{}uint8 MAUGMENTS = { - # Boolean - "==", - "&&", - "||", - - # Bitwise shifts - "<<", - ">>", - - # PREaugmented augmentors - "&=", - "|=", - "^=", - "+=", - "-=", - "*=", - "/=", - "%=", - "~=", - "`=", - - # POSTaugmented augmentors - "!&", - "!|", - "!^", - "!==", - "!&&", - "!||", - "!>", - "!<", - ">==", - "<==", - - # Increment and De-increment - "++", - "--", - - "is", - "len", - "size" -} - -;int MAX_MRESERVED = 3 - -/## - Checks if the character point p is in the string cmp - -#; is_in_string (~{}uint8 cmp, uint8 p) [bool] - - /; loop (int i = 0; i < len cmp`) [i++] - /; if (p == cmp`{i}) - ;return true - ;/ - ;/ - - ;return false -;/ - -/## - Checks if the string s is in the list cmp - -#; is_in_string_list (~{}{}uint8 cmp, ~{}uint8 s) [bool] - - /; loop (int i = 0; i < len cmp`) [i++] - - /; if (len s` == len cmp`{i}) - - /; loop (int j = 0; j < len s`) [j++] - - /; if (s`{j} !== cmp`{i}{j}) - ;break 1 - ;/ - ;/ - - ;return true - ;/ - - ;/ - - ;return false -;/ - -/; is_numeric_literal(~{}uint8 dat) [bool] - /; if (len dat` == 0) - ;return false - ;/ - - ;bool dec = true, flt = false - - ;int i = 0 - - /; if (len dat` > 1) - /; if (dat`{0} == '0' && !is_digit(dat`{1}) && dat`{1} !== '.') - ;dec = false - ;i = 2 - ;/ - ;/ - - /; loop (i < len dat`) [i++] - /; if (!is_digit(dat`{i}) && dec) - /; if (dat`{i} == '.') - /; if (flt) - ;return false - ;/ - ;flt = true - ;; else if (dec) - ;return false - ;/ - ;/ - ;/ - - ;return true -;/ - -/; is_text_literal(~{}uint8 dat) [bool] - /; if (len dat` < 1) - ;return false - ;/ - ;return dat`{0} == '"' || dat`{0} == '\'' -;/ - -/; string_closed ({}uint8 dat, uint8 c) [bool] - /; if (len dat < 2) - ;return false - ;/ - - ;uint8 closing = dat{0} - ;bool escaping = false - - /; loop (int i = 1; i < len dat) [i++] - /; if (dat{i} == closing && !escaping) - ;return true - ;; else if (dat{i} == '\\' && !escaping) - ;escaping = true - ;; else - ;escaping = false - ;/ - ;/ - - ;return false -;/ - -/# - Get the token_type value for a given string of character points - -#; get_token_type (~{}uint8 s) [int] - - /; if (len s` > 1) - - /; if (is_numeric_literal(s) || s`{0} == '"' || s`{0} == '\'') - ;return TOKEN_TYPE.LITERAL - ;/ - - /; if (is_in_string_list(~PREWORDS, s)) - ;return TOKEN_TYPE.PREWORD - ;; else if (is_in_string_list(~KEYTYPES, s)) - ;return TOKEN_TYPE.KEYTYPE - ;; else if (is_in_string_list(~KEYWORDS, s)) - ;return TOKEN_TYPE.KEYWORD - ;; else if (is_in_string_list(~LITERALS, s)) - ;return TOKEN_TYPE.LITERAL - ;; else if (is_in_string_list(~MDELIMITS, s)) - ;return TOKEN_TYPE.DELIMIT - ;; else if (is_in_string_list(~MAUGMENTS, s)) - ;return TOKEN_TYPE.AUGMENT - ;/ - - ;return TOKEN_TYPE.DEFWORD - - ;;else if (len s` == 1) - - /; if (is_digit(s`{0})) - ;return TOKEN_TYPE.LITERAL - ;/ - - /; if (is_in_string(~DELIMITS, s`{0})) - ;return TOKEN_TYPE.DELIMIT - ;; else if (is_in_string(~LINESEPS, s`{0})) - ;return TOKEN_TYPE.LINESEP - ;; else if (is_in_string(~INLNSEPS, s`{0})) - ;return TOKEN_TYPE.INLNSEP - ;; else if (is_in_string(~AUGMENTS, s`{0})) - ;return TOKEN_TYPE.AUGMENT - ;/ - - ;return TOKEN_TYPE.DEFWORD - ;/ - - # What, we just produce vacant tokens now? - # Something has gone wrong. - - ;return -1 -;/
\ No newline at end of file diff --git a/tnslc/parse/tokenizer.tnsl b/tnslc/parse/tokenizer.tnsl deleted file mode 100644 index 30ccd4b..0000000 --- a/tnslc/parse/tokenizer.tnsl +++ /dev/null @@ -1,114 +0,0 @@ -/# - Copyright 2021-2022 Kyle Gunger - - This file is licensed under the CDDL 1.0 (the License) - and may only be used in accordance with the License. - You should have received a copy of the License with this - software/source code. If you did not, a copy can be found - at the following URL: - - https://opensource.org/licenses/CDDL-1.0 - - THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO - WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE - EXPRESS OR IMPLIED -#/ - -/; is_float (~{}uint8 dat) [bool] - ;return is_numeric_literal(dat) && is_in_string(dat, '.') -;/ - -/; break_token ({}uint8 dat, uint8 c) [bool] - /; if (len dat == 0) - ;return false - - ;; else if (dat{0} == '"' || dat{0} == '\'') - ;return string_closed(dat, c) - - ;; else if (is_in_string(~RESERVED, dat{len dat - 1})) - - /; if (is_in_string(~RESERVED, c)) - ;dat.append(c) - ;return get_token_type(~dat) == TOKEN_TYPE.DEFWORD - - ;; else if (len dat == 1 && dat{0} == '.') - ;return !is_digit(c) - - ;/ - - ;return true - - ;; else if (is_in_string(~RESERVED, c)) - - /; if (is_numeric_literal(~dat) && !is_float(~dat) && c == '.') - ;return false - - ;/ - - ;return true - ;/ - - ;return is_whitespace(c) -;/ - -/; strip_and_expand (~{}Token dat) [{}Token] - ;{}Token out = {} - - ;bool cblk = false - - /; loop (int i = 0; i < len dat`) [i++] - /; if (!cblk) - /; if (string_equate(dat`{i}.data`, "/#")) - ;cblk = true - ;; else - ;out.append(dat`{i}) - ;/ - - ;; else if (string_equate(dat`{i}.data`, "#/")) - ;cblk = false - ;/ - ;/ - - ;return out -;/ - -/; tokenize (tnsl.io.File fstr) [~{}Token] - ;{}Token out = {} - ;{}uint8 tdat = {} - ;bool comment = false - ;int line = 1, col = 1 - - /; loop (int i = fstr.read(); i !== -1) [i = fstr.read()] - /; if (break_token(tdat, i) && !comment) - /; if (len tdat == 1 && tdat{0} == '#') - ;tdat = {} - ;comment = true - ;; else if (len tdat > 0) - ;{}uint8 tmp = tdat - ;Token ttk = {get_token_type(~tmp), line, col, ~tmp} - ;out.append(ttk) - ;tdat = {} - ;/ - ;/ - - /; if ( (!is_whitespace(i) || is_text_literal(~tdat)) && !comment ) - ;tdat.append(i) - ;; else if (i == '\n') - ;line++ - ;col = 0 - /; if (comment) - ;comment = false - ;/ - ;/ - - ;col++ - ;/ - - /; if (len tdat > 0) - ;Token ttk = {get_token_type(~tdat), line, col, ~tdat} - ;out.append(ttk) - ;/ - - ;out = strip_and_expand(~out) - ;return ~out -;/ |