bool HAD_ERROR = false struct Token { ~uint8 data, int _type, line, col, int closing # only has meaning for delimiters } /; method Token /; eq (Token tok) [bool] return utils.strcmp(self.data, tok.data) ;/ /; eq_str(~uint8 str) [bool] return utils.strcmp(self.data, str) ;/ /; sprint [~uint8] utils.Vector out out.init(1) ~uint8 tmp out.push_char('{') out.push_cstr(self.data) out.push_char(',') out.push_char(' ') tmp = utils.int_to_str(self._type) out.push_cstr(tmp) _delete(tmp) out.push_char(',') out.push_char(' ') tmp = utils.int_to_str(self.line) out.push_cstr(tmp) _delete(tmp) out.push_char(',') out.push_char(' ') tmp = utils.int_to_str(self.col) out.push_cstr(tmp) _delete(tmp) out.push_char('}') return out.as_cstr() ;/ ;/ /; _is_space(uint8 char) [bool] /; if (char == '\t' || char == '\r' || char == ' ') return true ;/ return false ;/ /; _in_csv (~uint8 csv, ~uint8 str) [bool] int along = 0 /; loop (csv` !== 0) [csv++] /; if (csv` == ',') /; if (along !< 0 && str{along} == 0) return true ;/ along = 0 ;; else if (along !< 0 && str{along} == csv`) along++ ;; else along = 0 along-- ;/ ;/ return along !< 0 && str{along} == 0 ;/ /; _str_contains (~uint8 str, uint8 ch) [bool] /; loop (str` !== 0) [str++] /; if (str` == ch) return true ;/ ;/ return false ;/ ~uint8 KEYWORDS = "module,export,asm,if,else,loop,label,goto,continue,break,return,import,as,using,struct,method,interface,enum,implements,operator,is\0" ~uint8 KEYTYPES = "uint8,uint16,uint32,uint64,uint,int8,int16,int32,int64,int,float32,float64,float,bool,vect,void\0" ~uint8 LITERALS = "false,true\0" ~uint8 RESERVED = "~`!@#$%^&*()[]{}-+=\"\'\\|;:/?.>,<\0" ~uint8 OPS = "`~!%^&|*-=+./><\0" ~uint8 MULTI_OPS = "==,&&,||,^^,!==,!&&,!||,!^^,!<,!>,<<,>>,!&,!|,!^,++,--,>==,<==,len,!=\0" ~uint8 DELIMS = "()[]{}\0" ~uint8 MULTI_DELIMS = ";:#\0" int TT_DEFWORD = 0 int TT_KEYWORD = 1 int TT_KEYTYPE = 2 int TT_LITERAL = 3 int TT_AUGMENT = 4 int TT_DELIMIT = 5 int TT_SPLITTR = 6 int TT_INVALID = 7 /; opposite_delim (uint8 c) [uint8] /; loop (int i = 0; DELIMS{i} !== 0) [i++] /; if (DELIMS{i} == c) /; if (i % 2 == 1) return DELIMS{i - 1} ;; else return DELIMS{i + 1} ;/ ;/ ;/ return c ;/ /; is_delim (~uint8 str) [bool] int l = utils.strlen(str) /; if (l == 1 && _str_contains(DELIMS, str`) == true) return true ;; else if (l == 2) /; if (_str_contains(MULTI_DELIMS, str{0}) == true) return (str{0} == str{1} && str{0} !== '#') || str{1} == '/' ;; else if (_str_contains(MULTI_DELIMS, str{1}) == true) return str{0} == '/' ;/ ;/ return false ;/ /; token_type (~uint8 str) [int] int l = utils.strlen(str) /; if (l < 1) return TT_INVALID ;/ /; if (is_delim(str) == true) return TT_DELIMIT ;; else if (l == 1 && is_reserved(str{0}) == true) /; if (_str_contains(OPS, str{0}) == true) return TT_AUGMENT ;; else if (str` == ',' || str` == ';' || str` == ':') return TT_SPLITTR ;/ ;; else if (_in_csv(MULTI_OPS, str) == true) return TT_AUGMENT ;; else if (_in_csv(KEYTYPES, str) == true) return TT_KEYTYPE ;; else if (_in_csv(KEYWORDS, str) == true) return TT_KEYWORD ;; else if (_in_csv(LITERALS, str) == true) return TT_LITERAL ;/ return TT_DEFWORD ;/ /; is_reserved (uint8 char) [bool] return _str_contains(RESERVED, char) ;/ /; parse_nl_token (~int line, col) [Token] Token out out.line = line` out.col = col` out._type = TT_SPLITTR out.data = _alloc(2) out.data{0} = '\n' out.data{1} = 0 col` = 1 line` = line` + 1 return out ;/ /; parse_comment (~utils.File fin, ~uint8 char) /; loop (fin`.at_end == false && char` !== '\n') char` = fin`.read() ;/ ;/ /; parse_string_token(~utils.File fin, ~uint8 char, ~int line, col) [Token] utils.Vector str str.init(1) str.push_char(char`) uint8 first = char` Token out out.line = line` out.col = col` out._type = TT_LITERAL char` = fin`.read() col`++ /; loop (char` !== first && fin`.at_end == false) /; if (char` == '\\') str.push_char(char`) char` = fin`.read() col`++ /; if (fin`.at_end == false) /; if (char` == '\n') line`++ col` = 0 ;/ str.push_char(char`) char` = fin`.read() col`++ ;/ ;; else /; if (char` == '\n') line`++ col` = 0 ;/ str.push_char(char`) char` = fin`.read() col`++ ;/ ;/ /; if (fin`.at_end == false) char` = fin`.read() ;/ str.push_char(first) out.data = str.as_cstr() return out ;/ /; in_num_range (uint8 char) [bool] bool dec = char !< '0' && char !> '9' bool hex = char !< 'a' && char !> 'f' bool HEX = char !< 'A' && char !> 'F' hex = hex || HEX return dec || hex || char == '.' ;/ /; parse_numeric_token (~utils.File fin, ~uint8 char, ~int line, col) [Token] Token out out.line = line` out.col = col` out._type = TT_LITERAL utils.Vector num num.init(1) num.push_char(char`) char` = fin`.read() col`++ bool dec = false, ok = true /; loop (fin`.at_end == false && ok == true) /; if (char` == '.' && dec == true) ok = false ;; else if (char` == '.') dec = true ;/ /; if (ok == true && in_num_range(char`) == true) num.push_char(char`) char` = fin`.read() col`++ ;; else ok = false ;/ ;/ out.data = num.as_cstr() return out ;/ /; parse_word_token (~utils.File fin, ~uint8 char, ~int line, col) [Token] Token out out.line = line` out.col = col` utils.Vector str str.init(1) bool ok = true /; loop (fin`.at_end == false && ok == true) str.push_char(char`) char` = fin`.read() col`++ /; if (char` == '\n' || _is_space(char`) == true || is_reserved(char`) == true) ok = false ;/ ;/ out.data = str.as_cstr() out._type = token_type(out.data) return out ;/ ~uint8 ERROR_RESERVED = "unexpected reserved token in file\0" /; parse_reserved_tokens (~utils.File fin, ~uint8 char, ~int line, col, ~utils.Vector out) Token tmp tmp.line = line` tmp.col = col` utils.Vector res res.init(1) bool ok = true /; loop (fin`.at_end == false && ok == true) res.push_char(char`) int after = token_type(res.as_cstr()) /; if (after == TT_DEFWORD) bool res_unexpected = true /; if (res.count > 1) res.pop() res_unexpected = false ;/ tmp.data = res.as_cstr() tmp._type = token_type(tmp.data) /; if (res_unexpected == true) HAD_ERROR = true report_error(fin`, tmp, ERROR_RESERVED) ;/ out`.push(~tmp) res.init(1) res.push_char(char`) tmp.col = col` ;/ char` = fin`.read() col`++ /; if (is_reserved(char`) == false || char` == '\"' || char` == '\'') ok = false ;/ ;/ /; if (res.count > 0) tmp.data = res.as_cstr() tmp._type = token_type(tmp.data) out`.push(~tmp) ;; else res.end() ;/ ;/ ~uint8 RES_LOL = "Reserved %c\n\0" ~uint8 PUSH = "Pushing token %s\n\0" /; tokenize (~utils.File fin) [utils.Vector] # create a tmp token Token tok tok._type = TT_INVALID utils.Vector out, delims # init vectors out.init(len tok) delims.init(8) # A stack of delimiters # open file for reading fin`.open() # main counters for line and col uint line = 1, col = 1 # main loop uint8 char = fin`.read() /; loop (fin`.at_end == false) /; if (_is_space(char) == true) # skip spaces char = fin`.read() col++ ;; else if (char == '#') parse_comment(fin, ~char) ;; else if (char == '\"' || char == '\'') # Generate string literals tok = parse_string_token(fin, ~char, ~line, ~col) ;; else if (char !< '0' && char !> '9') # handle numeric literals tok = parse_numeric_token(fin, ~char, ~line, ~col) ;; else if (is_reserved(char) == true) parse_reserved_tokens(fin, ~char, ~line, ~col, ~out) ;; else if (char !== '\n') # word tokens tok = parse_word_token(fin, ~char, ~line, ~col) ;/ /; if (tok._type !== TT_INVALID) out.push(~tok) tok._type = TT_INVALID ;/ /; if (char == '\n') tok = parse_nl_token(~line, ~col) char = fin`.read() out.push(~tok) tok._type = TT_INVALID ;/ ;/ delims.end() # done with file fin`.close() return out ;/ /; free_token_list (~utils.Vector vec) ~Token t /; loop (int i = 0; i < vec`.count) [i++] t = vec`.get(i) _delete(t`.data) ;/ vec`.end() ;/