summaryrefslogtreecommitdiff
path: root/tnslc
diff options
context:
space:
mode:
authorKyle Gunger <kgunger12@gmail.com>2024-03-31 03:09:06 -0400
committerKyle Gunger <kgunger12@gmail.com>2024-03-31 03:09:06 -0400
commitacc33ffeb8f5eae5e6bb805f1cb409841f0aad75 (patch)
tree5801ecf64408ba281362310b325c80ca1f028b20 /tnslc
parentd90f254a65e7443e7d6ee7d4a3e89df21bdd4e8c (diff)
Tenative tokenizer
Diffstat (limited to 'tnslc')
-rw-r--r--tnslc/compile/compile.tnsl1
-rw-r--r--tnslc/compile/error.tnsl14
-rw-r--r--tnslc/compile/generator.tnsl27
-rw-r--r--tnslc/compile/tokenizer.tnsl388
-rw-r--r--tnslc/test.tnsl3
-rw-r--r--tnslc/tnslc.tnsl2
6 files changed, 419 insertions, 16 deletions
diff --git a/tnslc/compile/compile.tnsl b/tnslc/compile/compile.tnsl
index 00fe081..eca0247 100644
--- a/tnslc/compile/compile.tnsl
+++ b/tnslc/compile/compile.tnsl
@@ -3,4 +3,5 @@
:import "tokenizer.tnsl"
:import "lexer.tnsl"
:import "generator.tnsl"
+ :import "error.tnsl"
;/
diff --git a/tnslc/compile/error.tnsl b/tnslc/compile/error.tnsl
new file mode 100644
index 0000000..7857075
--- /dev/null
+++ b/tnslc/compile/error.tnsl
@@ -0,0 +1,14 @@
+
+~uint8 ERR_NUM = ":%d\0"
+~uint8 TOK_PRNT = " \"%s\": \0"
+
+/; report_error (utils.File file, Token token, ~uint8 message)
+ ~uint s = file.path.to_cstr('/')
+ _printf(s)
+ _print_num(ERR_NUM, token.line)
+ _print_num(ERR_NUM, token.col)
+ _print_num(TOK_PRNT, token.data)
+ _printf(message)
+ _printf(newline)
+;/
+
diff --git a/tnslc/compile/generator.tnsl b/tnslc/compile/generator.tnsl
index 28a834d..eedc552 100644
--- a/tnslc/compile/generator.tnsl
+++ b/tnslc/compile/generator.tnsl
@@ -1,16 +1,25 @@
-/; generate (utils.File fin, fout)
- fin.open()
- fout.create()
+~uint8 TOKEN_COUNT = "Token count: %d\n\0"
- uint8 buf = fin.read()
- /; loop (fin.at_end == false && fout.at_end == false)
- fout.write(buf)
- buf = fin.read()
+/; generate (~utils.File fin, fout)
+
+ utils.Vector tokens = tokenize(fin)
+
+ _print_num(TOKEN_COUNT, tokens.count)
+
+ fout`.create()
+
+ /; loop (int i = 0; i < tokens.count) [i++]
+ ~Token tok = tokens.get(i)
+ ~uint8 buf = tok`.sprint()
+ fout`.write_cstr(buf)
+ fout`.write('\n')
+ _delete(buf)
;/
- fin.close()
- fout.close()
+ fout`.close()
+
+ free_token_list(~tokens)
;/
diff --git a/tnslc/compile/tokenizer.tnsl b/tnslc/compile/tokenizer.tnsl
index 722a5a0..e528e34 100644
--- a/tnslc/compile/tokenizer.tnsl
+++ b/tnslc/compile/tokenizer.tnsl
@@ -1,9 +1,13 @@
+bool HAD_ERROR = false
+
struct Token {
~uint8 data,
int
_type,
line,
- col
+ col,
+ int
+ closing # only has meaning for delimiters
}
/; method Token
@@ -14,10 +18,46 @@ struct Token {
/; eq_str(~uint8 str) [bool]
return utils.strcmp(self.data, str)
;/
+
+ /; sprint [~uint8]
+ utils.Vector out
+ out.init(1)
+
+ ~uint8 tmp
+
+ out.push_char('{')
+
+ out.push_cstr(self.data)
+
+ out.push_char(',')
+ out.push_char(' ')
+
+ tmp = utils.int_to_str(self._type)
+ out.push_cstr(tmp)
+ _delete(tmp)
+
+ out.push_char(',')
+ out.push_char(' ')
+
+ tmp = utils.int_to_str(self.line)
+ out.push_cstr(tmp)
+ _delete(tmp)
+
+ out.push_char(',')
+ out.push_char(' ')
+
+ tmp = utils.int_to_str(self.col)
+ out.push_cstr(tmp)
+ _delete(tmp)
+
+ out.push_char('}')
+
+ return out.as_cstr()
+ ;/
;/
/; _is_space(uint8 char) [bool]
- /; if (char == '\t' || char == '\n' || char == '\r' || char == ' ')
+ /; if (char == '\t' || char == '\r' || char == ' ')
return true
;/
return false
@@ -56,20 +96,356 @@ struct Token {
~uint8 KEYTYPES = "uint8,uint16,uint32,uint64,uint,int8,int16,int32,int64,int,float32,float64,float,bool,vect,void\0"
~uint8 LITERALS = "false,true\0"
-~uint8 RESERVED = "~`!@#$%^&*()[]{}+_=\"\'\\|;:/?.>,<\0"
+~uint8 RESERVED = "~`!@#$%^&*()[]{}+=\"\'\\|;:/?.>,<\0"
~uint8 OPS = "`~!%^&*-=+./><\0"
-~uint8 MULTI_OPS = "==,&&,||,^^,!==,!&&,!||,!^^,!<,!>,<<,>>,!&,!|,!^,++,--,>==,<==,len\0"
+~uint8 MULTI_OPS = "==,&&,||,^^,!=,!&&,!||,!^^,!<,!>,<<,>>,!&,!|,!^,++,--,>=,<=,len\0"
~uint8 DELIMS = "()[]{}\0"
~uint8 MULTI_DELIMS = ";:#\0"
+int TT_DEFWORD = 0
+int TT_KEYWORD = 1
+int TT_KEYTYPE = 2
+int TT_LITERAL = 3
+int TT_AUGMENT = 4
+int TT_DELIMIT = 5
+int TT_SPLITTR = 6
+int TT_INVALID = 7
+
+/; opposite_delim (uint8 c) [uint8]
+ /; loop (int i = 0; DELIMS{i} !== 0) [i++]
+ /; if (DELIMS{i} == c)
+ /; if (i % 2 == 1)
+ return DELIMS{i - 1}
+ ;; else
+ return DELIMS{i + 1}
+ ;/
+ ;/
+ ;/
+ return c
+;/
+
+/; is_delim (~uint8 str) [bool]
+ int l = utils.strlen(str)
+
+ /; if (l == 1 && _str_contains(DELIMS, str`) == true)
+ return true
+ ;; else if (l == 2)
+ /; if (_str_contains(MULTI_DELIMS, str{0}) == true)
+ return (str{0} == str{1} && str{0} !== '#') || str{1} == '/'
+ ;; else if (_str_contains(MULTI_DELIMS, str{1}) == true)
+ return str{0} == '/'
+ ;/
+ ;/
+ return false
+;/
+
+/; token_type (~uint8 str) [int]
+ int l = utils.strlen(str)
+
+ /; if (l < 1)
+ return TT_INVALID
+ ;/
+
+ /; if (is_delim(str) == true)
+ return TT_DELIMIT
+ ;; else if (l == 1 && is_reserved(str{0}) == true)
+ /; if (_str_contains(OPS, str{0}) == true)
+ return TT_AUGMENT
+ ;; else if (str` == ',' || str` == ';' || str` == ':')
+ return TT_SPLITTR
+ ;/
+ ;; else if (_in_csv(MULTI_OPS, str) == true)
+ return TT_AUGMENT
+ ;; else if (_in_csv(KEYTYPES, str) == true)
+ return TT_KEYTYPE
+ ;; else if (_in_csv(KEYWORDS, str) == true)
+ return TT_KEYWORD
+ ;; else if (_in_csv(LITERALS, str) == true)
+ return TT_LITERAL
+ ;/
+
+ return TT_DEFWORD
+;/
+
+/; is_reserved (uint8 char) [bool]
+ return _str_contains(RESERVED, char)
+;/
+
+/; parse_nl_token (~int line, col) [Token]
+ Token out
+ out.line = line`
+ out.col = col`
+ out._type = TT_SPLITTR
+
+ out.data = _alloc(2)
+ out.data{0} = '\n'
+ out.data{1} = 0
+
+ col` = 1
+ line` = line` + 1
+
+ return out
+;/
+
+/; parse_comment (~utils.File fin, ~uint8 char)
+ /; loop (fin`.at_end == false && char` !== '\n')
+ char` = fin`.read()
+ ;/
+;/
+
+/; parse_string_token(~utils.File fin, ~uint8 char, ~int line, col) [Token]
+ utils.Vector str
+ str.init(1)
+ str.push_char(char`)
+
+ uint8 first = char`
+
+ Token out
+ out.line = line`
+ out.col = col`
+ out._type = TT_LITERAL
+
+ char` = fin`.read()
+ col`++
+ /; loop (char` !== first && fin`.at_end == false)
+ /; if (char` == '\\')
+ str.push_char(char`)
+ char` = fin`.read()
+ col`++
+ /; if (fin`.at_end == false)
+ /; if (char` == '\n')
+ line`++
+ col` = 0
+ ;/
+ str.push_char(char`)
+ char` = fin`.read()
+ col`++
+ ;/
+ ;; else
+ /; if (char` == '\n')
+ line`++
+ col` = 0
+ ;/
+ str.push_char(char`)
+ char` = fin`.read()
+ col`++
+ ;/
+ ;/
+
+ /; if (fin`.at_end == false)
+ char` = fin`.read()
+ ;/
+
+ str.push_char(first)
+
+ out.data = str.as_cstr()
+
+ return out
+;/
+
+/; in_num_range (uint8 char) [bool]
+ bool dec = char !< '0' && char !> '9'
+
+ bool hex = char !< 'a' && char !> 'f'
+ bool HEX = char !< 'A' && char !> 'F'
+ hex = hex || HEX
+
+ return dec || hex || char == '.'
+;/
+
+/; parse_numeric_token (~utils.File fin, ~uint8 char, ~int line, col) [Token]
+ Token out
+ out.line = line`
+ out.col = col`
+ out._type = TT_LITERAL
+
+ utils.Vector num
+ num.init(1)
+ num.push_char(char`)
-/; tokenize(utils.File fin) [utils.Vector]
+ char` = fin`.read()
+ col`++
+
+ bool dec = false, ok = true
+
+ /; loop (fin`.at_end == false && ok == true)
+ /; if (char` == '.' && dec == true)
+ ok = false
+ ;; else if (char` == '.')
+ dec = true
+ ;/
+
+ /; if (ok == true && in_num_range(char`) == true)
+ num.push_char(char`)
+ char` = fin`.read()
+ col`++
+ ;; else
+ ok = false
+ ;/
+ ;/
+
+ out.data = num.as_cstr()
+
+ return out
+;/
+
+/; parse_word_token (~utils.File fin, ~uint8 char, ~int line, col) [Token]
+ Token out
+ out.line = line`
+ out.col = col`
+
+ utils.Vector str
+ str.init(1)
+
+ bool ok = true
+
+ /; loop (fin`.at_end == false && ok == true)
+ str.push_char(char`)
+
+ char` = fin`.read()
+ col`++
+
+ /; if (char` == '\n' || _is_space(char`) == true || is_reserved(char`) == true)
+ ok = false
+ ;/
+ ;/
+
+ out.data = str.as_cstr()
+ out._type = token_type(out.data)
+ return out
+;/
+
+/; parse_reserved_tokens (~utils.File fin, ~uint8 char, ~int line, col, ~utils.Vector out)
+ Token tmp
+ tmp.line = line`
+ tmp.col = col`
+
+ utils.Vector res
+ res.init(1)
+
+ bool ok = true
+
+ /; loop (fin`.at_end == false && ok == true)
+
+ res.push_char(char`)
+ int after = token_type(res.as_cstr())
+
+ /; if (after == TT_DEFWORD)
+ res.pop()
+ tmp.data = res.as_cstr()
+ tmp._type = token_type(tmp.data)
+ out`.push(~tmp)
+
+ res.init(1)
+ res.push_char(char`)
+ tmp.col = col`
+ ;/
+
+ char` = fin`.read()
+ col`++
+
+ /; if (is_reserved(char`) == false || char` == '\"' || char` == '\'')
+ ok = false
+ ;/
+ ;/
+
+ /; if (res.count > 0)
+ tmp.data = res.as_cstr()
+ tmp._type = token_type(tmp.data)
+ out`.push(~tmp)
+ ;; else
+ res.end()
+ ;/
+;/
+
+~uint8 RES_LOL = "Reserved %c\n\0"
+~uint8 PUSH = "Pushing token %s\n\0"
+
+/; tokenize (~utils.File fin) [utils.Vector]
+ # create a tmp token
Token tok
+ tok._type = TT_INVALID
+
+ utils.Vector out, delims, str
+
+ # init vectors
- utils.Vector out
out.init(len tok)
+ delims.init(8) # A stack of delimiters
+ str.init(1)
+
+ # open file for reading
+ fin`.open()
+
+ # main counters for line and col
+ uint line = 1, col = 1
+
+ # main loop
+ uint8 char = fin`.read()
+ /; loop (fin`.at_end == false)
+ /; if (_is_space(char) == true)
+ # skip spaces
+ char = fin`.read()
+ col++
+
+ ;; else if (char == '#')
+ parse_comment(fin, ~char)
+
+ ;; else if (char == '\"' || char == '\'')
+ # Generate string literals
+ tok = parse_string_token(fin, ~char, ~line, ~col)
+
+ ;; else if (char !< '0' && char !> '9')
+ # handle numeric literals
+ tok = parse_numeric_token(fin, ~char, ~line, ~col)
+
+ ;; else if (is_reserved(char) == true)
+ parse_reserved_tokens(fin, ~char, ~line, ~col, ~out)
+
+ ;; else if (char != '\n')
+ # word tokens
+ tok = parse_word_token(fin, ~char, ~line, ~col)
+ ;/
+
+ /; if (tok._type !== TT_INVALID)
+ out.push(~tok)
+ tok._type = TT_INVALID
+ ;/
+
+ /; if (char == '\n')
+ tok = parse_nl_token(~line, ~col)
+ char = fin`.read()
+ out.push(~tok)
+ tok._type = TT_INVALID
+ ;/
+ ;/
+
+ /; if (str.count > 0)
+ tok.data = str.as_cstr()
+ tok._type = token_type(tok.data)
+ out.push(~tok)
+ ;; else
+ str.end()
+ ;/
+
+ delims.end()
+
+ # done with file
+ fin`.close()
return out
;/
+
+/; free_token_list (~utils.Vector vec)
+ ~Token t
+
+ /; loop (int i = 0; i < vec`.count) [i++]
+ t = vec`.get(i)
+ _delete(t`.data)
+ ;/
+
+ vec`.end()
+;/
+
diff --git a/tnslc/test.tnsl b/tnslc/test.tnsl
new file mode 100644
index 0000000..7a51951
--- /dev/null
+++ b/tnslc/test.tnsl
@@ -0,0 +1,3 @@
+/; main [int]
+;/
+
diff --git a/tnslc/tnslc.tnsl b/tnslc/tnslc.tnsl
index 84fbdb5..ce68133 100644
--- a/tnslc/tnslc.tnsl
+++ b/tnslc/tnslc.tnsl
@@ -35,7 +35,7 @@ usage:
fout.init(DEFAULT_FOUT)
;/
- compile.generate(fin, fout)
+ compile.generate(~fin, ~fout)
fin.end()
fout.end()