/# Copyright 2021 Kyle Gunger This file is licensed under the CDDL 1.0 (the License) and may only be used in accordance with the License. You should have received a copy of the License with this software/source code. If you did not, a copy can be found at the following URL: https://opensource.org/licenses/CDDL-1.0 THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE EXPRESS OR IMPLIED #/ /# The various types of tokens #/ ; enum TOKEN_TYPE [int] { LINESEP = 0, INLNSEP = 1, DELIMIT = 2, AUGMENT = 3, LITERAL = 4, KEYTYPE = 5, PREWORD = 6, KEYWORD = 7, DEFWORD = 8 } /# Token struct definition #/ ; struct Token { int token_type, line, col, ~{}charp data } /; method Token /; operator delete ;delete self.data ;/ /; add_char (~{}charp part) # ;uint l = len self.data` # ;realloc self.data, l + len part /;loop (int i = 0; i < len part`) [i++] # ;self.data`{l + i} = part{i} ;self.data`.append(part`{i}) ;/ ;/ ;/ /# Reserved words and characters, as well as helper funcs for checking their token types. #/ ;{}{}charp PREWORDS = { "include", "define", "extern", "size", "align", "address", "rootfile", "if", "else", "abi" } ;const {}{}charp KEYTYPES = { "bool", "char", "charp", "int8", "int16", "int32", "int64", "int", "uint8", "uint16", "uint32", "uint64", "uint", "float32", "float64", "float", "void", "type" } ;{}{}charp KEYWORDS = { "struct", "interface", "enum", "is", "extends", "loop", "continue", "break", "match", "case", "default", "label", "goto", "if", "else", "const", "static", "volatile", "method", "override", "self", "super", "operator", "raw", "asm", "inline", "delete", "module", "export" } ;{}{}charp LITERALS = { "true", "false" } ;{}charp RESERVED = "`~!#%^&*()-=+[]{}|;:,.<>/" ;{}charp DELIMITS = "()[]{}" ;{}charp LINESEPS = ";:#" ;{}charp INLNSEPS = "," ;{}charp AUGMENTS = "~`.&|^>>", # PREaugmented augmentors "&=", "|=", "^=", "+=", "-=", "*=", "/=", "%=", "~=", "`=", # POSTaugmented augmentors "!&", "!|", "!^", "!==", "!&&", "!||", "!>", "!<", ">==", "<==", # Increment and De-increment "++", "--" } ;int MAX_MRESERVED = 3 /## Checks if the character point p is in the string cmp #; is_in_string (~{}charp cmp, charp p) [bool] /; loop (int i = 0; i < len cmp`) [i++] /; if (s == cmp`{i}) ;return true ;/ ;/ ;return false ;/ /## Checks if the string s is in the list cmp #; is_in_string_list (~{}{}charp cmp, ~{}charp s) [bool] /; loop (int i = 0; i < len cmp`) [i++] /; if (len s == len cmp`{i}) /; loop (int j = 0; j < len s`) [j++] /; if (s`{j} !== cmp`{i}{j}) ;break 1 ;/ ;/ ;return true ;/ ;/ ;return false ;/ /# Get the token_type value for a given string of character points #; get_token_type (~{}charp s) [int] /; if (len s > 1) /; if (is_in_string_list(~PREWORDS, s)) ;return TOKEN_TYPE.PREWORD ;; else if (is_in_string_list(~KEYTYPES, s)) ;return TOKEN_TYPE.KEYTYPE ;; else if (is_in_string_list(~KEYWORDS, s)) ;return TOKEN_TYPE.KEYWORD ;; else if (is_in_string_list(~LITERALS, s)) ;return TOKEN_TYPE.LITERAL ;; else if (is_in_string_list(~MDELIMITS, s)) ;return TOKEN_TYPE.DELIMIT ;; else if (is_in_string_list(~MAUGMENTS, s)) ;return TOKEN_TYPE.AUGMENT ;/ ;return TOKEN_TYPE.DEFWORD ;;else if (len s` == 1) /; if (is_in_string(~DELIMITS, s`{0})) ;return TOKEN_TYPE.DELIMIT ;; else if (is_in_string(~LINESEPS, s`{0})) ;return TOKEN_TYPE.LINESEP ;; else if (is_in_string(~INLNSEPS, s`{0})) ;return TOKEN_TYPE.INLNSEP ;; else if (is_in_string(~AUGMENTS, s`{0})) ;return TOKEN_TYPE.AUGMENT ;/ ;return TOKEN_TYPE.DEFWORD ;/ # What, we just produce vacant tokens now? # Something has gone wrong. ;return -1 ;/