# All single reserved characters {}uint8 MULTI_PARENS = "/;:#" {}uint8 PARENS = "()[]{}" {}uint8 SEPS = "\n;:," {}uint8 RESERVED = "`~!%^&*()-+=[]{}|;:/?<>.," {}uint8 AUGMENTS = "=~!<>&|^+-*/%`." {}uint8 WHITESPACE = " \r\n\t" # All lists of keywords are comma delim because the compiler does not yet support arrays of strings {}uint8 CSV_AUGMENTS = "++,--,==,!==,&&,||,^^,<==,>==,!>,!<,~=,`=,%=,^=,&=,*=,!=,|=,/=,<<,>>,!&,!|,!^,len,is" {}uint8 CSV_KEYWORDS = "if,else,loop,continue,break,return,method,struct,enum,interface,export,module,const,static,volatile,raw,extends,override,asm" {}uint8 CSV_KEYTYPES = "uint8,uint16,uint32,uint64,uint,int8,int16,int32,int64,int,float32,float64,float,comp32,comp64,comp,vect,bool,type,void" # Types of tokens enum TOKEN_TYPE [uint] { SEPARATOR = 0, DELIMITER = 1, AUGMENT = 2, KEYTYPE = 3, KEYWORD = 4, LITERAL = 5, DEFWORD = 6 } # Token structure represents a single token in the program struct Token { uint _type, line, column, ~uint8 data } # Shortcut methods on the token struct /; method Token # Initialize the data buffer /; start self.data = _alloc(1) self.data{0} = 0 ;/ # Append a character to the end of the token /; append (uint8 ch) int ln = cstr_len(self.data) self.data = _realloc(self.data, ln + 2) self.data{ln} = ch self.data{ln + 1} = 0 ;/ # Remove the last character from this token /; pop int ln = cstr_len(self.data) self.data{ln - 1} = 0 ;/ # Copy another token to this token /; copy (Token other) self._type = other._type self.line = other.line self.column = other.column self.data = _alloc(cstr_len(other.data) + 1) cstr_copy(other.data, self.data) ;/ # Delete the memory associated with this token /; _del _realloc(self.data, 0) ;/ # length of the string that this token encodes /; _len [int] return cstr_len(self.data) ;/ ;/ {}uint8 tkn_ok = "OK \0", tkn_no = "NO \0", tkn_nl = "\n\0" /; print_token (Token tok, ~void file_out) /; if (in_csv(~CSV_KEYWORDS, tok.data) == true) write_to_file(file_out, ~tkn_ok{0}) ;; else write_to_file(file_out, ~tkn_no{0}) ;/ write_to_file(file_out, tok.data) write_to_file(file_out, ~tkn_nl{0}) ;/ # Returns true if the character is whitespace /; is_whitespace(uint8 c) [bool] return contains_char(~WHITESPACE, c) ;/ # Returns true if the character is reserved /; is_reserved (uint8 c) [bool] return contains_char(~RESERVED, c) ;/ # Returns true if the token is a valid reserved token /; tok_reserved (Token tok) [bool] /; if (tok._len() == 1) return is_reserved(tok.data{0}) ;; else if (tok._len() == 2) return contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1}) ;/ return in_csv(~CSV_AUGMENTS, tok.data) ;/ # Returns true if the token is a valid literal value /; tok_literal (Token tok) [bool] # TODO: implement literals return false ;/ /; get_tok_type(Token tok) [uint] /; if (tok_reserved(tok) == true) /; if (tok._len() > 1) /; if (contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1})) return TOKEN_TYPE.DELIMITER ;/ return TOKEN_TYPE.AUGMENT ;; else if (contains_char(~PARENS, tok.data{0}) == true) return TOKEN_TYPE.DELIMITER ;; else if (contains_char(~SEPS, tok.data{0}) == true) return TOKEN_TYPE.SEPARATOR ;; else if (contains_char(~AUGMENTS, tok.data{0}) == true) return TOKEN_TYPE.AUGMENT ;/ ;; else if (in_csv(~CSV_KEYWORDS, tok.data) == true) return TOKEN_TYPE.KEYWORD ;; else if (in_csv(~CSV_KEYTYPES, tok.data) == true) return TOKEN_TYPE.KEYTYPE ;; else if (tok_literal(tok) == true) return TOKEN_TYPE.LITERAL ;/ return TOKEN_TYPE.DEFWORD ;/ /; break_token(~Token tok, uint8 c) [bool] # return true uint type_before = get_tok_type(tok`) tok`.append(c) uint type_after = get_tok_type(tok`) tok`.pop() bool a = true return a ;/ /; tokenize_file (~void file_in, file_out) Token tmp tmp.start() uint8 buf = 0 int read_count = 0 # Start reading at beginning of file _read_byte(file_in, ~buf, ~read_count) # Read loop. /; loop (_read_byte(file_in, ~buf, ~read_count)) /; if (read_count == 0) break ;/ /; if (buf == '#') /; loop (_read_byte(file_in, ~buf, ~read_count)) /; if (buf == '\n' || read_count == 0) break ;/ ;/ ;; else if (break_token(~tmp, buf) == true) /; if (tmp._len() > 0) print_token(tmp, file_out) ;/ tmp._del() tmp.start() /; if (is_whitespace(buf) == false) tmp.append(buf) ;; else if (buf == WHITESPACE{2}) tmp.append(WHITESPACE{2}) print_token(tmp, file_out) tmp._del() tmp.start() ;/ ;; else tmp.append(buf) ;/ read_count = 0 ;/ /; if (tmp._len() > 0) print_token(tmp, file_out) ;/ tmp._del() ;/ {}uint8 w_SEP = "SEPARATOR\n\0" {}uint8 w_DEL = "DELIMITER\n\0" {}uint8 w_AUG = "AUGMENT\n\0" {}uint8 w_KTP = "KEYTYPE\n\0" {}uint8 w_KWD = "KEYWORD\n\0" {}uint8 w_LIT = "LITERAL\n\0" {}uint8 w_DEF = "DEFWORD\n\0" /; print_tok_type(uint tt) ~uint8 ptr = ~w_DEF{0} /; if (tt == TOKEN_TYPE.SEPARATOR) ptr = ~w_SEP{0} ;; else if (tt == TOKEN_TYPE.DELIMITER) ptr = ~w_DEL{0} ;; else if (tt == TOKEN_TYPE.AUGMENT) ptr = ~w_AUG{0} ;; else if (tt == TOKEN_TYPE.KEYTYPE) ptr = ~w_KTP{0} ;; else if (tt == TOKEN_TYPE.KEYWORD) ptr = ~w_KWD{0} ;; else if (tt == TOKEN_TYPE.LITERAL) ptr = ~w_LIT{0} ;; else if (tt == TOKEN_TYPE.DEFWORD) ptr = ~w_DEF{0} ;/ _printf(ptr) ;/ {}uint8 test_multi = "/;\0" {}uint8 test_paren = "(\0" {}uint8 test_seps = ",\0" {}uint8 test_aug = ".\0" {}uint8 test_maug = "++\0" {}uint8 test_mkw = "if\0" {}uint8 test_mkt = "bool\0" {}uint8 test_def = "main\0" {}uint8 space = " \0" /; tests Token tk # Delimiter tk.data = ~test_multi{0} _printf(tk.data) _printf(~space{0}) print_tok_type(get_tok_type(tk)) tk.data = ~test_paren{0} _printf(tk.data) _printf(~space{0}) print_tok_type(get_tok_type(tk)) tk.data = ~test_seps{0} _printf(tk.data) _printf(~space{0}) print_tok_type(get_tok_type(tk)) tk.data = ~test_aug{0} _printf(tk.data) _printf(~space{0}) print_tok_type(get_tok_type(tk)) tk.data = ~test_maug{0} _printf(tk.data) _printf(~space{0}) print_tok_type(get_tok_type(tk)) tk.data = ~test_mkw{0} _printf(tk.data) _printf(~space{0}) print_tok_type(get_tok_type(tk)) tk.data = ~test_mkt{0} _printf(tk.data) _printf(~space{0}) print_tok_type(get_tok_type(tk)) tk.data = ~test_def{0} _printf(tk.data) _printf(~space{0}) print_tok_type(get_tok_type(tk)) ;/