summaryrefslogtreecommitdiff
path: root/tnslc/src/parse
diff options
context:
space:
mode:
authorKyle Gunger <kgunger12@gmail.com>2021-08-22 15:25:54 -0400
committerKyle Gunger <kgunger12@gmail.com>2021-08-22 15:25:54 -0400
commit3add402da9fc5b574f34e37e951779212ce28ed1 (patch)
treea081a2331923dce6ee29c87285be901d58b0bc4b /tnslc/src/parse
parent11f9c56ae3861e32ac45785e9f30ed5f4c19ea32 (diff)
Begin port of tnsl-parse code to native TNSL
Diffstat (limited to 'tnslc/src/parse')
-rw-r--r--tnslc/src/parse/parse.tnsl5
-rw-r--r--tnslc/src/parse/token.tnsl259
-rw-r--r--tnslc/src/parse/tokenizer.tnsl25
3 files changed, 286 insertions, 3 deletions
diff --git a/tnslc/src/parse/parse.tnsl b/tnslc/src/parse/parse.tnsl
index dc6b9a2..e10ab4c 100644
--- a/tnslc/src/parse/parse.tnsl
+++ b/tnslc/src/parse/parse.tnsl
@@ -15,5 +15,8 @@
#/
/; export module parse
- :import 'token.tnsl'
+ /:import
+ "token.tnsl"
+ "tokenizer.tnsl"
+ :/
;/ \ No newline at end of file
diff --git a/tnslc/src/parse/token.tnsl b/tnslc/src/parse/token.tnsl
index 92b2ca4..a841f58 100644
--- a/tnslc/src/parse/token.tnsl
+++ b/tnslc/src/parse/token.tnsl
@@ -14,13 +14,27 @@
EXPRESS OR IMPLIED
#/
-;struct Token {
+/# The various types of tokens #/
+; enum TOKEN_TYPE [uint] {
+ LINESEP = 0,
+ INLNSEP = 1,
+ DELIMIT = 2,
+ AUGMENT = 3,
+ LITERAL = 4,
+ KEYTYPE = 5,
+ PREWORD = 6,
+ KEYWORD = 7,
+ DEFWORD = 8
+}
+
+/# Token struct definition #/
+;raw struct Token {
uint
type,
line,
char,
- ~{}char
+ ~{}charp
data
}
@@ -29,4 +43,245 @@
/; operator delete
;delete this.data
;/
+;/
+
+/#
+ Reserved words and characters, as well as
+ helper funcs for checking their token types.
+#/
+
+;const {}{}charp PREWORDS = {
+ "include",
+ "define",
+ "extern",
+ "size",
+ "align",
+ "address",
+ "rootfile",
+ "if",
+ "else",
+ "abi"
+}
+
+;const {}{}charp KEYTYPES = {
+ "bool",
+ "char",
+ "charp",
+
+ "int8",
+ "int16",
+ "int32",
+ "int64",
+ "int",
+ "uint8",
+ "uint16",
+ "uint32",
+ "uint64",
+ "uint",
+
+ "float32",
+ "float64",
+ "float",
+
+ "void",
+ "type"
+}
+
+;const {}{}charp KEYWORDS = {
+ "struct",
+ "interface",
+ "enum",
+ "is",
+ "extends",
+
+ "loop",
+ "continue",
+ "break",
+
+ "match",
+ "case",
+ "default",
+
+ "label",
+ "goto",
+
+ "if",
+ "else",
+
+ "const",
+ "static",
+ "volatile",
+
+ "method",
+ "override",
+ "self",
+ "super",
+ "operator",
+
+ "raw",
+ "asm",
+ "inline",
+
+ "delete",
+
+ "module",
+ "export",
+}
+
+;const {}{}charp LITERALS = {
+ "true",
+ "false"
+}
+
+;const {}charp DELIMITS = "()[]{}"
+;const {}charp LINESEPS = ";:#"
+;const {}charp INLNSEPS = ","
+;const {}charp AUGMENTS = "~`.&|^><!+-*/%"
+
+;const {}{}charp MDELIMITS = {
+ # Code block
+ "/;",
+ ";/",
+
+ # Comment block
+ "/#",
+ "#/",
+
+ # Preproc block
+ "/:",
+ ":/",
+
+ # Redef blocks
+ ";;",
+ "::",
+ ";#",
+ ":#",
+ "#;",
+ "#:"
+}
+
+;const {}{}charp MAUGMENTS = {
+ # Boolean
+ "==",
+ "&&",
+ "||",
+
+ # Bitwise shifts
+ "<<",
+ ">>",
+
+ # PREaugmented augmentors
+ "&=",
+ "|=",
+ "^=",
+ "+=",
+ "-=",
+ "*=",
+ "/=",
+ "%=",
+ "~=",
+ "`=",
+
+ # POSTaugmented augmentors
+ "!&",
+ "!|",
+ "!^",
+ "!==",
+ "!&&",
+ "!||",
+ "!>",
+ "!<",
+ ">==",
+ "<==",
+
+ # Increment and De-increment
+ "++",
+ "--"
+}
+
+; const uint MAX_MRESERVED = 3
+
+/##
+ Checks if the character point p is in the string cmp
+
+#; is_in_string (`const {}charp cmp, charp p) [bool]
+
+ /; for (int i = 0; i < len cmp) [i++]
+
+ /; if (s == cmp{i})
+ ;return true
+ ;/
+ ;/
+
+ ;return false
+;/
+
+
+/##
+ Checks if the string s is in the list cmp
+
+#; is_in_string_list (`const {}{}charp cmp, `{}charp s) [bool]
+
+ /; for (int i = 0; i < len cmp) [i++]
+
+ /; if (len s == len cmp{i})
+
+ /; for (int j = 0; j < len s) [j++]
+
+ /; if (s{j} !== cmp{i}{j})
+ ;goto cont_outer
+ ;/
+ ;/
+
+ ;return true
+ ;/
+
+ ;label cont_outer
+ ;/
+
+ ;return false
+;/
+
+/#
+ Get the token_type value for a given string of character points
+
+#; get_token_type (`{}charp s) [int]
+
+ /; if (len s > 1)
+
+ /; if (is_in_string_list(~PREWORDS, ~s))
+ ;return TOKEN_TYPE.PREWORD
+ ;; else if (is_in_string_list(~KEYTYPES, ~s))
+ ;return TOKEN_TYPE.KEYTYPE
+ ;; else if (is_in_string_list(~KEYWORDS, ~s))
+ ;return TOKEN_TYPE.KEYWORD
+ ;; else if (is_in_string_list(~LITERALS, ~s))
+ ;return TOKEN_TYPE.LITERAL
+ ;; else if (is_in_string_list(~MDELIMITS, ~s))
+ ;return TOKEN_TYPE.DELIMIT
+ ;; else if (is_in_string_list(~MAUGMENTS, ~s))
+ ;return TOKEN_TYPE.AUGMENT
+ ;/
+
+ ;return TOKEN_TYPE.DEFWORD
+
+ ;;else if (len s == 1)
+
+ /; if (is_in_string(~DELIMITS, s{0}))
+ ;return TOKEN_TYPE.DELIMIT
+ ;; else if (is_in_string(~LINESEPS, s{0}))
+ ;return TOKEN_TYPE.LINESEP
+ ;; else if (is_in_string(~INLNSEPS, s{0}))
+ ;return TOKEN_TYPE.INLNSEP
+ ;; else if (is_in_string(~AUGMENTS, s{0}))
+ ;return TOKEN_TYPE.AUGMENT
+ ;/
+
+ ;return TOKEN_TYPE.DEFWORD
+ ;/
+
+ # What, we just produce vacant tokens now?
+ # Something has gone wrong.
+
+ ;return -1
;/ \ No newline at end of file
diff --git a/tnslc/src/parse/tokenizer.tnsl b/tnslc/src/parse/tokenizer.tnsl
new file mode 100644
index 0000000..ec34d83
--- /dev/null
+++ b/tnslc/src/parse/tokenizer.tnsl
@@ -0,0 +1,25 @@
+/#
+ Copyright 2021 Kyle Gunger
+
+ This file is licensed under the CDDL 1.0 (the License)
+ and may only be used in accordance with the License.
+ You should have received a copy of the License with this
+ software/source code. If you did not, a copy can be found
+ at the following URL:
+
+ https://opensource.org/licenses/CDDL-1.0
+
+ THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO
+ WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE
+ EXPRESS OR IMPLIED
+#/
+
+
+/##
+ parse.numeric_literal tokenizes the next numeric literal value in a file.
+ Returns a token with the proper data as well as the number of characters read
+
+#; numeric_literal () [Token, uint]
+
+
+;/ \ No newline at end of file