summaryrefslogtreecommitdiff
path: root/tnslc/parse
diff options
context:
space:
mode:
authorKyle Gunger <kgunger12@gmail.com>2022-04-11 18:50:08 -0400
committerKyle Gunger <kgunger12@gmail.com>2022-04-11 18:50:08 -0400
commitd0213b4202ac12d7e92f7125e87c31d9ecb637dd (patch)
tree50164dfa2295c6806f3866c24a897789856337a7 /tnslc/parse
parent41cb9e107b20153449526cedec101e33f21d8630 (diff)
Remove src folders
Diffstat (limited to 'tnslc/parse')
-rw-r--r--tnslc/parse/parse.tnsl20
-rw-r--r--tnslc/parse/token.tnsl295
-rw-r--r--tnslc/parse/tokenizer.tnsl53
3 files changed, 368 insertions, 0 deletions
diff --git a/tnslc/parse/parse.tnsl b/tnslc/parse/parse.tnsl
new file mode 100644
index 0000000..ec65f7b
--- /dev/null
+++ b/tnslc/parse/parse.tnsl
@@ -0,0 +1,20 @@
+/##
+ Copyright 2021 Kyle Gunger
+
+ This file is licensed under the CDDL 1.0 (the License)
+ and may only be used in accordance with the License.
+ You should have received a copy of the License with this
+ software/source code. If you did not, a copy can be found
+ at the following URL:
+
+ https://opensource.org/licenses/CDDL-1.0
+
+ THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO
+ WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE
+ EXPRESS OR IMPLIED
+#/
+
+/; module parse
+ :include "tnslc/parse/token.tnsl"
+ :include "tnslc/parse/tokenizer.tnsl"
+;/ \ No newline at end of file
diff --git a/tnslc/parse/token.tnsl b/tnslc/parse/token.tnsl
new file mode 100644
index 0000000..0134483
--- /dev/null
+++ b/tnslc/parse/token.tnsl
@@ -0,0 +1,295 @@
+/#
+ Copyright 2021 Kyle Gunger
+
+ This file is licensed under the CDDL 1.0 (the License)
+ and may only be used in accordance with the License.
+ You should have received a copy of the License with this
+ software/source code. If you did not, a copy can be found
+ at the following URL:
+
+ https://opensource.org/licenses/CDDL-1.0
+
+ THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO
+ WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE
+ EXPRESS OR IMPLIED
+#/
+
+/# The various types of tokens #/
+; enum TOKEN_TYPE [int] {
+ LINESEP = 0,
+ INLNSEP = 1,
+ DELIMIT = 2,
+ AUGMENT = 3,
+ LITERAL = 4,
+ KEYTYPE = 5,
+ PREWORD = 6,
+ KEYWORD = 7,
+ DEFWORD = 8
+}
+
+/# Token struct definition #/
+; struct Token {
+ int
+ token_type,
+ line,
+ col,
+
+ ~{}charp
+ data
+}
+
+/; method Token
+
+ /; operator delete
+ ;delete self.data
+ ;/
+
+ /; add_char (~{}charp part)
+ # ;uint l = len self.data`
+ # ;realloc self.data, l + len part
+ /;loop (int i = 0; i < len part`) [i++]
+ # ;self.data`{l + i} = part{i}
+ ;self.data`.append(part`{i})
+ ;/
+ ;/
+;/
+
+/#
+ Reserved words and characters, as well as
+ helper funcs for checking their token types.
+#/
+
+;{}{}charp PREWORDS = {
+ "include",
+ "define",
+ "extern",
+ "size",
+ "align",
+ "address",
+ "rootfile",
+ "if",
+ "else",
+ "abi"
+}
+
+;const {}{}charp KEYTYPES = {
+ "bool",
+ "char",
+ "charp",
+
+ "int8",
+ "int16",
+ "int32",
+ "int64",
+ "int",
+ "uint8",
+ "uint16",
+ "uint32",
+ "uint64",
+ "uint",
+
+ "float32",
+ "float64",
+ "float",
+
+ "void",
+ "type"
+}
+
+;{}{}charp KEYWORDS = {
+ "struct",
+ "interface",
+ "enum",
+ "is",
+ "extends",
+
+ "loop",
+ "continue",
+ "break",
+
+ "match",
+ "case",
+ "default",
+
+ "label",
+ "goto",
+
+ "if",
+ "else",
+
+ "const",
+ "static",
+ "volatile",
+
+ "method",
+ "override",
+ "self",
+ "super",
+ "operator",
+
+ "raw",
+ "asm",
+ "inline",
+
+ "delete",
+
+ "module",
+ "export"
+}
+
+;{}{}charp LITERALS = {
+ "true",
+ "false"
+}
+
+;{}charp RESERVED = "`~!#%^&*()-=+[]{}|;:,.<>/"
+
+;{}charp DELIMITS = "()[]{}"
+;{}charp LINESEPS = ";:#"
+;{}charp INLNSEPS = ","
+;{}charp AUGMENTS = "~`.&|^><!+-*/%"
+
+;{}{}charp MDELIMITS = {
+ # Code block
+ "/;",
+ ";/",
+
+ # Comment block
+ "/#",
+ "#/",
+
+ # Preproc block
+ "/:",
+ ":/",
+
+ # Redef blocks
+ ";;",
+ "::",
+ ";#",
+ ":#",
+ "#;",
+ "#:"
+}
+
+;{}{}charp MAUGMENTS = {
+ # Boolean
+ "==",
+ "&&",
+ "||",
+
+ # Bitwise shifts
+ "<<",
+ ">>",
+
+ # PREaugmented augmentors
+ "&=",
+ "|=",
+ "^=",
+ "+=",
+ "-=",
+ "*=",
+ "/=",
+ "%=",
+ "~=",
+ "`=",
+
+ # POSTaugmented augmentors
+ "!&",
+ "!|",
+ "!^",
+ "!==",
+ "!&&",
+ "!||",
+ "!>",
+ "!<",
+ ">==",
+ "<==",
+
+ # Increment and De-increment
+ "++",
+ "--"
+}
+
+;int MAX_MRESERVED = 3
+
+/##
+ Checks if the character point p is in the string cmp
+
+#; is_in_string (~{}charp cmp, charp p) [bool]
+
+ /; loop (int i = 0; i < len cmp`) [i++]
+ /; if (s == cmp`{i})
+ ;return true
+ ;/
+ ;/
+
+ ;return false
+;/
+
+/##
+ Checks if the string s is in the list cmp
+
+#; is_in_string_list (~{}{}charp cmp, ~{}charp s) [bool]
+
+ /; loop (int i = 0; i < len cmp`) [i++]
+
+ /; if (len s == len cmp`{i})
+
+ /; loop (int j = 0; j < len s`) [j++]
+
+ /; if (s`{j} !== cmp`{i}{j})
+ ;break 1
+ ;/
+ ;/
+
+ ;return true
+ ;/
+
+ ;/
+
+ ;return false
+;/
+
+/#
+ Get the token_type value for a given string of character points
+
+#; get_token_type (~{}charp s) [int]
+
+ /; if (len s > 1)
+
+ /; if (is_in_string_list(~PREWORDS, s))
+ ;return TOKEN_TYPE.PREWORD
+ ;; else if (is_in_string_list(~KEYTYPES, s))
+ ;return TOKEN_TYPE.KEYTYPE
+ ;; else if (is_in_string_list(~KEYWORDS, s))
+ ;return TOKEN_TYPE.KEYWORD
+ ;; else if (is_in_string_list(~LITERALS, s))
+ ;return TOKEN_TYPE.LITERAL
+ ;; else if (is_in_string_list(~MDELIMITS, s))
+ ;return TOKEN_TYPE.DELIMIT
+ ;; else if (is_in_string_list(~MAUGMENTS, s))
+ ;return TOKEN_TYPE.AUGMENT
+ ;/
+
+ ;return TOKEN_TYPE.DEFWORD
+
+ ;;else if (len s` == 1)
+
+ /; if (is_in_string(~DELIMITS, s`{0}))
+ ;return TOKEN_TYPE.DELIMIT
+ ;; else if (is_in_string(~LINESEPS, s`{0}))
+ ;return TOKEN_TYPE.LINESEP
+ ;; else if (is_in_string(~INLNSEPS, s`{0}))
+ ;return TOKEN_TYPE.INLNSEP
+ ;; else if (is_in_string(~AUGMENTS, s`{0}))
+ ;return TOKEN_TYPE.AUGMENT
+ ;/
+
+ ;return TOKEN_TYPE.DEFWORD
+ ;/
+
+ # What, we just produce vacant tokens now?
+ # Something has gone wrong.
+
+ ;return -1
+;/ \ No newline at end of file
diff --git a/tnslc/parse/tokenizer.tnsl b/tnslc/parse/tokenizer.tnsl
new file mode 100644
index 0000000..54671fe
--- /dev/null
+++ b/tnslc/parse/tokenizer.tnsl
@@ -0,0 +1,53 @@
+/#
+ Copyright 2021 Kyle Gunger
+
+ This file is licensed under the CDDL 1.0 (the License)
+ and may only be used in accordance with the License.
+ You should have received a copy of the License with this
+ software/source code. If you did not, a copy can be found
+ at the following URL:
+
+ https://opensource.org/licenses/CDDL-1.0
+
+ THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO
+ WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE
+ EXPRESS OR IMPLIED
+#/
+
+/; is_space (charp c) [bool]
+ ;return c == '\t' || c == '\n' || c == ' '
+;/
+
+/; break_token ({}charp dat, charp c) [bool]
+ /; if (len dat == 0)
+ ;return false
+
+ ;/
+ ;return is_space(c)
+;/
+
+/; parse_reserved ({}charp dat) [{}Token]
+
+;/
+
+/; tokenize (tnsl.io.File fstr) [~{}Token]
+ ;{}Token out = {}
+ ;{}charp tdat = {}
+
+ /; loop (int i = fstr.read(); i !== -1) [i = fstr.read()]
+ /; if (break_token(tdat, i))
+ /; if (len tdat > 0)
+ ;{}charp tmp = tdat
+ ;Token ttk = {0, 0, 0, ~tmp}
+ ;out.append(ttk)
+ ;tdat = {}
+ ;/
+ ;; else if ( !is_space(i) )
+ ;tdat.append(i)
+ ;/
+ ;/
+
+ ;tnsl.io.println(tdat)
+
+ ;return ~out
+;/