From d0213b4202ac12d7e92f7125e87c31d9ecb637dd Mon Sep 17 00:00:00 2001 From: Kyle Gunger Date: Mon, 11 Apr 2022 18:50:08 -0400 Subject: Remove src folders --- tnslc/parse/parse.tnsl | 20 +++ tnslc/parse/token.tnsl | 295 +++++++++++++++++++++++++++++++++++++++++++++ tnslc/parse/tokenizer.tnsl | 53 ++++++++ 3 files changed, 368 insertions(+) create mode 100644 tnslc/parse/parse.tnsl create mode 100644 tnslc/parse/token.tnsl create mode 100644 tnslc/parse/tokenizer.tnsl (limited to 'tnslc/parse') diff --git a/tnslc/parse/parse.tnsl b/tnslc/parse/parse.tnsl new file mode 100644 index 0000000..ec65f7b --- /dev/null +++ b/tnslc/parse/parse.tnsl @@ -0,0 +1,20 @@ +/## + Copyright 2021 Kyle Gunger + + This file is licensed under the CDDL 1.0 (the License) + and may only be used in accordance with the License. + You should have received a copy of the License with this + software/source code. If you did not, a copy can be found + at the following URL: + + https://opensource.org/licenses/CDDL-1.0 + + THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO + WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE + EXPRESS OR IMPLIED +#/ + +/; module parse + :include "tnslc/parse/token.tnsl" + :include "tnslc/parse/tokenizer.tnsl" +;/ \ No newline at end of file diff --git a/tnslc/parse/token.tnsl b/tnslc/parse/token.tnsl new file mode 100644 index 0000000..0134483 --- /dev/null +++ b/tnslc/parse/token.tnsl @@ -0,0 +1,295 @@ +/# + Copyright 2021 Kyle Gunger + + This file is licensed under the CDDL 1.0 (the License) + and may only be used in accordance with the License. + You should have received a copy of the License with this + software/source code. If you did not, a copy can be found + at the following URL: + + https://opensource.org/licenses/CDDL-1.0 + + THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO + WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE + EXPRESS OR IMPLIED +#/ + +/# The various types of tokens #/ +; enum TOKEN_TYPE [int] { + LINESEP = 0, + INLNSEP = 1, + DELIMIT = 2, + AUGMENT = 3, + LITERAL = 4, + KEYTYPE = 5, + PREWORD = 6, + KEYWORD = 7, + DEFWORD = 8 +} + +/# Token struct definition #/ +; struct Token { + int + token_type, + line, + col, + + ~{}charp + data +} + +/; method Token + + /; operator delete + ;delete self.data + ;/ + + /; add_char (~{}charp part) + # ;uint l = len self.data` + # ;realloc self.data, l + len part + /;loop (int i = 0; i < len part`) [i++] + # ;self.data`{l + i} = part{i} + ;self.data`.append(part`{i}) + ;/ + ;/ +;/ + +/# + Reserved words and characters, as well as + helper funcs for checking their token types. +#/ + +;{}{}charp PREWORDS = { + "include", + "define", + "extern", + "size", + "align", + "address", + "rootfile", + "if", + "else", + "abi" +} + +;const {}{}charp KEYTYPES = { + "bool", + "char", + "charp", + + "int8", + "int16", + "int32", + "int64", + "int", + "uint8", + "uint16", + "uint32", + "uint64", + "uint", + + "float32", + "float64", + "float", + + "void", + "type" +} + +;{}{}charp KEYWORDS = { + "struct", + "interface", + "enum", + "is", + "extends", + + "loop", + "continue", + "break", + + "match", + "case", + "default", + + "label", + "goto", + + "if", + "else", + + "const", + "static", + "volatile", + + "method", + "override", + "self", + "super", + "operator", + + "raw", + "asm", + "inline", + + "delete", + + "module", + "export" +} + +;{}{}charp LITERALS = { + "true", + "false" +} + +;{}charp RESERVED = "`~!#%^&*()-=+[]{}|;:,.<>/" + +;{}charp DELIMITS = "()[]{}" +;{}charp LINESEPS = ";:#" +;{}charp INLNSEPS = "," +;{}charp AUGMENTS = "~`.&|^>>", + + # PREaugmented augmentors + "&=", + "|=", + "^=", + "+=", + "-=", + "*=", + "/=", + "%=", + "~=", + "`=", + + # POSTaugmented augmentors + "!&", + "!|", + "!^", + "!==", + "!&&", + "!||", + "!>", + "!<", + ">==", + "<==", + + # Increment and De-increment + "++", + "--" +} + +;int MAX_MRESERVED = 3 + +/## + Checks if the character point p is in the string cmp + +#; is_in_string (~{}charp cmp, charp p) [bool] + + /; loop (int i = 0; i < len cmp`) [i++] + /; if (s == cmp`{i}) + ;return true + ;/ + ;/ + + ;return false +;/ + +/## + Checks if the string s is in the list cmp + +#; is_in_string_list (~{}{}charp cmp, ~{}charp s) [bool] + + /; loop (int i = 0; i < len cmp`) [i++] + + /; if (len s == len cmp`{i}) + + /; loop (int j = 0; j < len s`) [j++] + + /; if (s`{j} !== cmp`{i}{j}) + ;break 1 + ;/ + ;/ + + ;return true + ;/ + + ;/ + + ;return false +;/ + +/# + Get the token_type value for a given string of character points + +#; get_token_type (~{}charp s) [int] + + /; if (len s > 1) + + /; if (is_in_string_list(~PREWORDS, s)) + ;return TOKEN_TYPE.PREWORD + ;; else if (is_in_string_list(~KEYTYPES, s)) + ;return TOKEN_TYPE.KEYTYPE + ;; else if (is_in_string_list(~KEYWORDS, s)) + ;return TOKEN_TYPE.KEYWORD + ;; else if (is_in_string_list(~LITERALS, s)) + ;return TOKEN_TYPE.LITERAL + ;; else if (is_in_string_list(~MDELIMITS, s)) + ;return TOKEN_TYPE.DELIMIT + ;; else if (is_in_string_list(~MAUGMENTS, s)) + ;return TOKEN_TYPE.AUGMENT + ;/ + + ;return TOKEN_TYPE.DEFWORD + + ;;else if (len s` == 1) + + /; if (is_in_string(~DELIMITS, s`{0})) + ;return TOKEN_TYPE.DELIMIT + ;; else if (is_in_string(~LINESEPS, s`{0})) + ;return TOKEN_TYPE.LINESEP + ;; else if (is_in_string(~INLNSEPS, s`{0})) + ;return TOKEN_TYPE.INLNSEP + ;; else if (is_in_string(~AUGMENTS, s`{0})) + ;return TOKEN_TYPE.AUGMENT + ;/ + + ;return TOKEN_TYPE.DEFWORD + ;/ + + # What, we just produce vacant tokens now? + # Something has gone wrong. + + ;return -1 +;/ \ No newline at end of file diff --git a/tnslc/parse/tokenizer.tnsl b/tnslc/parse/tokenizer.tnsl new file mode 100644 index 0000000..54671fe --- /dev/null +++ b/tnslc/parse/tokenizer.tnsl @@ -0,0 +1,53 @@ +/# + Copyright 2021 Kyle Gunger + + This file is licensed under the CDDL 1.0 (the License) + and may only be used in accordance with the License. + You should have received a copy of the License with this + software/source code. If you did not, a copy can be found + at the following URL: + + https://opensource.org/licenses/CDDL-1.0 + + THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO + WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE + EXPRESS OR IMPLIED +#/ + +/; is_space (charp c) [bool] + ;return c == '\t' || c == '\n' || c == ' ' +;/ + +/; break_token ({}charp dat, charp c) [bool] + /; if (len dat == 0) + ;return false + + ;/ + ;return is_space(c) +;/ + +/; parse_reserved ({}charp dat) [{}Token] + +;/ + +/; tokenize (tnsl.io.File fstr) [~{}Token] + ;{}Token out = {} + ;{}charp tdat = {} + + /; loop (int i = fstr.read(); i !== -1) [i = fstr.read()] + /; if (break_token(tdat, i)) + /; if (len tdat > 0) + ;{}charp tmp = tdat + ;Token ttk = {0, 0, 0, ~tmp} + ;out.append(ttk) + ;tdat = {} + ;/ + ;; else if ( !is_space(i) ) + ;tdat.append(i) + ;/ + ;/ + + ;tnsl.io.println(tdat) + + ;return ~out +;/ -- cgit v1.2.3