/# Copyright 2021 Kyle Gunger This file is licensed under the CDDL 1.0 (the License) and may only be used in accordance with the License. You should have received a copy of the License with this software/source code. If you did not, a copy can be found at the following URL: https://opensource.org/licenses/CDDL-1.0 THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE EXPRESS OR IMPLIED #/ /# Reserved words and characters, as well as helper funcs for checking their token types. #/ ;{}{}charp PREWORDS = { "include", "define", "extern", "size", "align", "address", "rootfile", "if", "else", "abi" } ;{}{}charp KEYTYPES = { "int8", "int16", "int32", "int64", "int", "uint8", "uint16", "uint32", "uint64", "uint", "float32", "float64", "float", "bool", "void", "type" } ;{}{}charp KEYWORDS = { "struct", "interface", "enum", "extends", "loop", "continue", "break", "match", "case", "default", "label", "goto", "if", "else", "const", "static", "volatile", "method", "override", "operator", "raw", "asm", "inline", "virtual", "delete", "alloc", "salloc", "realloc", "module", "export" } ;{}{}charp LITERALS = { "true", "false", "null", "self", "super" } ;{}charp RESERVED = "`~!#%^&*()-=+[]{}|;:,.<>/" ;{}charp DELIMITS = "()[]{}" ;{}charp LINESEPS = ";:#" ;{}charp INLNSEPS = "," ;{}charp AUGMENTS = "~`.&|^>>", # PREaugmented augmentors "&=", "|=", "^=", "+=", "-=", "*=", "/=", "%=", "~=", "`=", # POSTaugmented augmentors "!&", "!|", "!^", "!==", "!&&", "!||", "!>", "!<", ">==", "<==", # Increment and De-increment "++", "--", "is", "len", "size" } ;int MAX_MRESERVED = 3 /## Checks if the character point p is in the string cmp #; is_in_string (~{}charp cmp, charp p) [bool] /; loop (int i = 0; i < len cmp`) [i++] /; if (p == cmp`{i}) ;return true ;/ ;/ ;return false ;/ /## Checks if the string s is in the list cmp #; is_in_string_list (~{}{}charp cmp, ~{}charp s) [bool] /; loop (int i = 0; i < len cmp`) [i++] /; if (len s` == len cmp`{i}) /; loop (int j = 0; j < len s`) [j++] /; if (s`{j} !== cmp`{i}{j}) ;break 1 ;/ ;/ ;return true ;/ ;/ ;return false ;/ /; is_numeric_literal(~{}charp dat) [bool] /; if (len dat` == 0) ;return false ;/ ;bool dec = true, flt = false ;int i = 0 /; if (len dat` > 1) /; if (dat`{0} == '0' && !is_digit(dat`{1}) && dat`{1} !== '.') ;dec = false ;i = 2 ;/ ;/ /; loop (i < len dat`) [i++] /; if (!is_digit(dat`{i}) && dec) /; if (dat`{i} == '.') /; if (flt || !dec) ;return false ;/ ;flt = true ;; else if (dec) ;return false ;/ ;/ ;/ ;return true ;/ /; string_closed ({}charp dat, charp c) [bool] /; if (len dat < 2) ;return false ;/ ;charp closing = dat{0} ;bool escaping = false /; loop (int i = 1; i < len dat) [i++] /; if (dat{i} == closing && !escaping) ;return true ;; else if (dat{i} == '\\' && !escaping) ;escaping = true ;; else ;escaping = false ;/ ;/ ;return false ;/ /# Get the token_type value for a given string of character points #; get_token_type (~{}charp s) [int] /; if (len s` > 1) /; if (is_numeric_literal(s) || s`{0} == '"' || s`{0} == '\'') ;return TOKEN_TYPE.LITERAL ;/ /; if (is_in_string_list(~PREWORDS, s)) ;return TOKEN_TYPE.PREWORD ;; else if (is_in_string_list(~KEYTYPES, s)) ;return TOKEN_TYPE.KEYTYPE ;; else if (is_in_string_list(~KEYWORDS, s)) ;return TOKEN_TYPE.KEYWORD ;; else if (is_in_string_list(~LITERALS, s)) ;return TOKEN_TYPE.LITERAL ;; else if (is_in_string_list(~MDELIMITS, s)) ;return TOKEN_TYPE.DELIMIT ;; else if (is_in_string_list(~MAUGMENTS, s)) ;return TOKEN_TYPE.AUGMENT ;/ ;return TOKEN_TYPE.DEFWORD ;;else if (len s` == 1) /; if (is_digit(s`{0})) ;return TOKEN_TYPE.LITERAL ;/ /; if (is_in_string(~DELIMITS, s`{0})) ;return TOKEN_TYPE.DELIMIT ;; else if (is_in_string(~LINESEPS, s`{0})) ;return TOKEN_TYPE.LINESEP ;; else if (is_in_string(~INLNSEPS, s`{0})) ;return TOKEN_TYPE.INLNSEP ;; else if (is_in_string(~AUGMENTS, s`{0})) ;return TOKEN_TYPE.AUGMENT ;/ ;return TOKEN_TYPE.DEFWORD ;/ # What, we just produce vacant tokens now? # Something has gone wrong. ;return -1 ;/