summaryrefslogtreecommitdiff
path: root/tnslc/compile/tokenizer.tnsl
diff options
context:
space:
mode:
authorKyle Gunger <kgunger12@gmail.com>2024-07-19 03:21:39 -0400
committerKyle Gunger <kgunger12@gmail.com>2024-07-19 03:21:39 -0400
commit5d688b4da97da2c2f684940147478f12d1f2baba (patch)
treede7f4dff152249587790c2b07149769faaefaa37 /tnslc/compile/tokenizer.tnsl
parent34e3d4f52264cf707f7e73a8a4167f37eee812d9 (diff)
switch tokenization scheme
Diffstat (limited to 'tnslc/compile/tokenizer.tnsl')
-rw-r--r--tnslc/compile/tokenizer.tnsl455
1 files changed, 0 insertions, 455 deletions
diff --git a/tnslc/compile/tokenizer.tnsl b/tnslc/compile/tokenizer.tnsl
deleted file mode 100644
index 30fc8e8..0000000
--- a/tnslc/compile/tokenizer.tnsl
+++ /dev/null
@@ -1,455 +0,0 @@
-bool HAD_ERROR = false
-
-struct Token {
- ~uint8 data,
- int
- _type,
- line,
- col,
- int
- closing # only has meaning for delimiters
-}
-
-/; method Token
- /; eq (Token tok) [bool]
- return utils.strcmp(self.data, tok.data)
- ;/
-
- /; eq_str(~uint8 str) [bool]
- return utils.strcmp(self.data, str)
- ;/
-
- /; sprint [~uint8]
- utils.Vector out
- out.init(1)
-
- ~uint8 tmp
-
- out.push_char('{')
-
- out.push_cstr(self.data)
-
- out.push_char(',')
- out.push_char(' ')
-
- tmp = utils.int_to_str(self._type)
- out.push_cstr(tmp)
- _delete(tmp)
-
- out.push_char(',')
- out.push_char(' ')
-
- tmp = utils.int_to_str(self.line)
- out.push_cstr(tmp)
- _delete(tmp)
-
- out.push_char(',')
- out.push_char(' ')
-
- tmp = utils.int_to_str(self.col)
- out.push_cstr(tmp)
- _delete(tmp)
-
- out.push_char('}')
-
- return out.as_cstr()
- ;/
-;/
-
-/; _is_space(uint8 char) [bool]
- /; if (char == '\t' || char == '\r' || char == ' ')
- return true
- ;/
- return false
-;/
-
-/; _in_csv (~uint8 csv, ~uint8 str) [bool]
- int along = 0
-
- /; loop (csv` !== 0) [csv++]
- /; if (csv` == ',')
- /; if (along !< 0 && str{along} == 0)
- return true
- ;/
- along = 0
- ;; else if (along !< 0 && str{along} == csv`)
- along++
- ;; else
- along = 0
- along--
- ;/
- ;/
-
- return along !< 0 && str{along} == 0
-;/
-
-/; _str_contains (~uint8 str, uint8 ch) [bool]
- /; loop (str` !== 0) [str++]
- /; if (str` == ch)
- return true
- ;/
- ;/
- return false
-;/
-
-~uint8 KEYWORDS = "module,export,asm,if,else,loop,label,goto,continue,break,return,import,as,using,struct,method,interface,enum,implements,operator,is\0"
-~uint8 KEYTYPES = "uint8,uint16,uint32,uint64,uint,int8,int16,int32,int64,int,float32,float64,float,bool,vect,void\0"
-~uint8 LITERALS = "false,true\0"
-
-~uint8 RESERVED = "~`!@#$%^&*()[]{}-+=\"\'\\|;:/?.>,<\0"
-
-~uint8 OPS = "`~!%^&|*-=+./><\0"
-~uint8 MULTI_OPS = "==,&&,||,^^,!==,!&&,!||,!^^,!<,!>,<<,>>,!&,!|,!^,++,--,>==,<==,len,!=\0"
-
-~uint8 DELIMS = "()[]{}\0"
-~uint8 MULTI_DELIMS = ";:#\0"
-
-int TT_DEFWORD = 0
-int TT_KEYWORD = 1
-int TT_KEYTYPE = 2
-int TT_LITERAL = 3
-int TT_AUGMENT = 4
-int TT_DELIMIT = 5
-int TT_SPLITTR = 6
-int TT_INVALID = 7
-
-/; opposite_delim (uint8 c) [uint8]
- /; loop (int i = 0; DELIMS{i} !== 0) [i++]
- /; if (DELIMS{i} == c)
- /; if (i % 2 == 1)
- return DELIMS{i - 1}
- ;; else
- return DELIMS{i + 1}
- ;/
- ;/
- ;/
- return c
-;/
-
-/; is_delim (~uint8 str) [bool]
- int l = utils.strlen(str)
-
- /; if (l == 1 && _str_contains(DELIMS, str`) == true)
- return true
- ;; else if (l == 2)
- /; if (_str_contains(MULTI_DELIMS, str{0}) == true)
- return (str{0} == str{1} && str{0} !== '#') || str{1} == '/'
- ;; else if (_str_contains(MULTI_DELIMS, str{1}) == true)
- return str{0} == '/'
- ;/
- ;/
- return false
-;/
-
-/; token_type (~uint8 str) [int]
- int l = utils.strlen(str)
-
- /; if (l < 1)
- return TT_INVALID
- ;/
-
- /; if (is_delim(str) == true)
- return TT_DELIMIT
- ;; else if (l == 1 && is_reserved(str{0}) == true)
- /; if (_str_contains(OPS, str{0}) == true)
- return TT_AUGMENT
- ;; else if (str` == ',' || str` == ';' || str` == ':')
- return TT_SPLITTR
- ;/
- ;; else if (_in_csv(MULTI_OPS, str) == true)
- return TT_AUGMENT
- ;; else if (_in_csv(KEYTYPES, str) == true)
- return TT_KEYTYPE
- ;; else if (_in_csv(KEYWORDS, str) == true)
- return TT_KEYWORD
- ;; else if (_in_csv(LITERALS, str) == true)
- return TT_LITERAL
- ;/
-
- return TT_DEFWORD
-;/
-
-/; is_reserved (uint8 char) [bool]
- return _str_contains(RESERVED, char)
-;/
-
-/; parse_nl_token (~int line, col) [Token]
- Token out
- out.line = line`
- out.col = col`
- out._type = TT_SPLITTR
-
- out.data = _alloc(2)
- out.data{0} = '\n'
- out.data{1} = 0
-
- col` = 1
- line` = line` + 1
-
- return out
-;/
-
-/; parse_comment (~utils.File fin, ~uint8 char)
- /; loop (fin`.at_end == false && char` !== '\n')
- char` = fin`.read()
- ;/
-;/
-
-/; parse_string_token(~utils.File fin, ~uint8 char, ~int line, col) [Token]
- utils.Vector str
- str.init(1)
- str.push_char(char`)
-
- uint8 first = char`
-
- Token out
- out.line = line`
- out.col = col`
- out._type = TT_LITERAL
-
- char` = fin`.read()
- col`++
- /; loop (char` !== first && fin`.at_end == false)
- /; if (char` == '\\')
- str.push_char(char`)
- char` = fin`.read()
- col`++
- /; if (fin`.at_end == false)
- /; if (char` == '\n')
- line`++
- col` = 0
- ;/
- str.push_char(char`)
- char` = fin`.read()
- col`++
- ;/
- ;; else
- /; if (char` == '\n')
- line`++
- col` = 0
- ;/
- str.push_char(char`)
- char` = fin`.read()
- col`++
- ;/
- ;/
-
- /; if (fin`.at_end == false)
- char` = fin`.read()
- ;/
-
- str.push_char(first)
-
- out.data = str.as_cstr()
-
- return out
-;/
-
-/; in_num_range (uint8 char) [bool]
- bool dec = char !< '0' && char !> '9'
-
- bool hex = char !< 'a' && char !> 'f'
- bool HEX = char !< 'A' && char !> 'F'
- hex = hex || HEX
-
- return dec || hex || char == '.'
-;/
-
-/; parse_numeric_token (~utils.File fin, ~uint8 char, ~int line, col) [Token]
- Token out
- out.line = line`
- out.col = col`
- out._type = TT_LITERAL
-
- utils.Vector num
- num.init(1)
- num.push_char(char`)
-
- char` = fin`.read()
- col`++
-
- bool dec = false, ok = true
-
- /; loop (fin`.at_end == false && ok == true)
- /; if (char` == '.' && dec == true)
- ok = false
- ;; else if (char` == '.')
- dec = true
- ;/
-
- /; if (ok == true && in_num_range(char`) == true)
- num.push_char(char`)
- char` = fin`.read()
- col`++
- ;; else
- ok = false
- ;/
- ;/
-
- out.data = num.as_cstr()
-
- return out
-;/
-
-/; parse_word_token (~utils.File fin, ~uint8 char, ~int line, col) [Token]
- Token out
- out.line = line`
- out.col = col`
-
- utils.Vector str
- str.init(1)
-
- bool ok = true
-
- /; loop (fin`.at_end == false && ok == true)
- str.push_char(char`)
-
- char` = fin`.read()
- col`++
-
- /; if (char` == '\n' || _is_space(char`) == true || is_reserved(char`) == true)
- ok = false
- ;/
- ;/
-
- out.data = str.as_cstr()
- out._type = token_type(out.data)
- return out
-;/
-
-~uint8 ERROR_RESERVED = "unexpected reserved token in file\0"
-
-/; parse_reserved_tokens (~utils.File fin, ~uint8 char, ~int line, col, ~utils.Vector out)
- Token tmp
- tmp.line = line`
- tmp.col = col`
-
- utils.Vector res
- res.init(1)
-
- bool ok = true
-
- /; loop (fin`.at_end == false && ok == true)
-
- res.push_char(char`)
- int after = token_type(res.as_cstr())
-
- /; if (after == TT_DEFWORD)
- bool res_unexpected = true
- /; if (res.count > 1)
- res.pop()
- res_unexpected = false
- ;/
-
- tmp.data = res.as_cstr()
- tmp._type = token_type(tmp.data)
-
- /; if (res_unexpected == true)
- HAD_ERROR = true
- report_error(fin`, tmp, ERROR_RESERVED)
- ;/
-
- out`.push(~tmp)
-
- res.init(1)
- res.push_char(char`)
- tmp.col = col`
- ;/
-
- char` = fin`.read()
- col`++
-
- /; if (is_reserved(char`) == false || char` == '\"' || char` == '\'')
- ok = false
- ;/
- ;/
-
- /; if (res.count > 0)
- tmp.data = res.as_cstr()
- tmp._type = token_type(tmp.data)
- out`.push(~tmp)
- ;; else
- res.end()
- ;/
-;/
-
-~uint8 RES_LOL = "Reserved %c\n\0"
-~uint8 PUSH = "Pushing token %s\n\0"
-
-/; tokenize (~utils.File fin) [utils.Vector]
- # create a tmp token
- Token tok
- tok._type = TT_INVALID
-
- utils.Vector out, delims
-
- # init vectors
-
- out.init(len tok)
- delims.init(8) # A stack of delimiters
-
- # open file for reading
- fin`.open()
-
- # main counters for line and col
- uint line = 1, col = 1
-
- # main loop
- uint8 char = fin`.read()
- /; loop (fin`.at_end == false)
- /; if (_is_space(char) == true)
- # skip spaces
- char = fin`.read()
- col++
-
- ;; else if (char == '#')
- parse_comment(fin, ~char)
-
- ;; else if (char == '\"' || char == '\'')
- # Generate string literals
- tok = parse_string_token(fin, ~char, ~line, ~col)
-
- ;; else if (char !< '0' && char !> '9')
- # handle numeric literals
- tok = parse_numeric_token(fin, ~char, ~line, ~col)
-
- ;; else if (is_reserved(char) == true)
- parse_reserved_tokens(fin, ~char, ~line, ~col, ~out)
-
- ;; else if (char !== '\n')
- # word tokens
- tok = parse_word_token(fin, ~char, ~line, ~col)
- ;/
-
- /; if (tok._type !== TT_INVALID)
- out.push(~tok)
- tok._type = TT_INVALID
- ;/
-
- /; if (char == '\n')
- tok = parse_nl_token(~line, ~col)
- char = fin`.read()
- out.push(~tok)
- tok._type = TT_INVALID
- ;/
- ;/
-
- delims.end()
-
- # done with file
- fin`.close()
-
- return out
-;/
-
-/; free_token_list (~utils.Vector vec)
- ~Token t
-
- /; loop (int i = 0; i < vec`.count) [i++]
- t = vec`.get(i)
- _delete(t`.data)
- ;/
-
- vec`.end()
-;/
-