From 5d688b4da97da2c2f684940147478f12d1f2baba Mon Sep 17 00:00:00 2001
From: Kyle Gunger <kgunger12@gmail.com>
Date: Fri, 19 Jul 2024 03:21:39 -0400
Subject: switch tokenization scheme

---
 tnslc/compile/ast.tnsl        | 247 -----------------------
 tnslc/compile/compile.tnsl    |   8 +-
 tnslc/compile/error.tnsl      |  15 --
 tnslc/compile/function.tnsl   |  10 -
 tnslc/compile/generate.tnsl   |   2 +
 tnslc/compile/generator.tnsl  |  11 -
 tnslc/compile/module.tnsl     |  63 ------
 tnslc/compile/tokenizer.tnsl  | 455 ------------------------------------------
 tnslc/compile/variable.tnsl   |   9 -
 tnslc/parse/ast.tnsl          |   0
 tnslc/parse/parse.tnsl        |   4 +
 tnslc/parse/tokenizer.tnsl    |  90 +++++++++
 tnslc/tnslc.tnsl              |   3 +-
 tnslc/utils/algo.tnsl         |   2 +-
 tnslc/utils/c_wrap_linux.tnsl |  26 ++-
 tnslc/utils/file.tnsl         |  22 ++
 16 files changed, 147 insertions(+), 820 deletions(-)
 delete mode 100644 tnslc/compile/ast.tnsl
 delete mode 100644 tnslc/compile/error.tnsl
 delete mode 100644 tnslc/compile/function.tnsl
 create mode 100644 tnslc/compile/generate.tnsl
 delete mode 100644 tnslc/compile/generator.tnsl
 delete mode 100644 tnslc/compile/module.tnsl
 delete mode 100644 tnslc/compile/tokenizer.tnsl
 delete mode 100644 tnslc/compile/variable.tnsl
 create mode 100644 tnslc/parse/ast.tnsl
 create mode 100644 tnslc/parse/parse.tnsl
 create mode 100644 tnslc/parse/tokenizer.tnsl

(limited to 'tnslc')

diff --git a/tnslc/compile/ast.tnsl b/tnslc/compile/ast.tnsl
deleted file mode 100644
index 6efee60..0000000
--- a/tnslc/compile/ast.tnsl
+++ /dev/null
@@ -1,247 +0,0 @@
-
-int NT_MODULE = 0
-int NT_MOD_EX = 1
-int NT_BLOCK = 2
-int NT_FUNC = 3
-int NT_PARAM = 4
-int NT_RESULT = 5
-int NT_DATA = 6
-int NT_TYPE = 7
-int NT_STRUCT = 8
-
-struct Node {
-	int _type,
-	~uint8 data,
-	utils.Vector sub
-}
-
-/; method Node
-	/; init (int typ, ~uint8 dat)
-		self._type = typ
-		self.data = dat
-		Node sub
-		self.sub.init(len sub)
-	;/
-
-	/; end
-		_delete(self.data)
-
-		~Node n
-		/; loop (int i = 0; i < self.sub.count) [i++]
-			n = self.sub.get(i)
-			n`.end()
-		;/
-		self.sub.end()
-	;/
-
-;/
-
-/; check_via_next (~utils.Iterator it, ~uint8 chk) [bool]
-	it`.next()
-
-	/; if (it`.at_end() == true)
-		it`.prev()
-		return false
-	;/
-	
-	~Token cur = it`.get()
-	/; if (utils.strcmp(cur`.data, cur) == true)
-		return true
-	;/
-
-	it`.prev()
-	return false
-;/
-
-/; check_and_advance (~utils.Iterator it, ~uint8 chk) [bool]
-	/; if (it`.at_end() == true)
-		return false
-	;/
-
-	~Token cur = it`.get()
-	/; if (utils.strcmp(cur`.data, cur) == true)
-		it`.next()
-		return true
-	;/
-
-	return false
-;/
-
-/; build_struct (~utils.Iterator it, ~Node mod, ~utils.File fin)
-;/
-
-/; build_module (~utils.Iterator it, ~Node mod, ~utils.File fin)
-
-	int NT = NT_MODULE
-	~Token t = it`.get()
-	/; if (utils.strcmp(t`.data, "export\0"))
-		NT = NT_MOD_EX
-		it`.next()
-	;/
-
-	Node sub
-	it`.next()
-	t = it`.get()
-
-	/; if (t`._type !== TT_DEFWORD)
-		_printf("Error in module decl")
-		return
-	;/
-	
-	sub.init(NT, utils.strcpy(t`.data))
-	bool run = true
-
-	/; loop (it`.at_end() == false && run == true)
-		t = it`.get()
-		
-		/; if (utils.strcmp(t`.data, ";/\0") || utils.strcmp(t`.data, ";;\0"))
-			run = false
-
-		;; else if (utils.strcmp(t`.data, "/;\0"))
-			_printf("Block detected!\n\0")
-			build_block(it, ~sub, fin)
-
-		;; else if (utils.strcmp(t`.data, "struct\0") == true)
-			_printf("Struct detected!\n\0")
-			build_struct(it, ~sub, fin)
-
-		;; else if (utils.strcmp(t`.data, ":\0") == true)
-			_printf("Preproc detected!\n\0")
-			build_preproc(it, ~sub, fin)
-
-		;; else if (at_defn(it) == true)
-			_printf("Defn detected!\n\0")
-			build_vardef(it, ~sub, fin)
-
-		;; else
-			# _printf("Error detected!\n\0")
-			# TODO: ERROR
-		;/
-
-		/; if (run == true)
-			it`.next()
-		;/
-	;/
-
-	mod`.sub.push(~sub)
-;/
-
-/; build_method (~utils.Iterator it, ~Node mod, ~utils.File fin)
-;/
-
-/; build_function (~utils.Iterator it, ~Node mod, ~utils.File fin)
-;/
-
-/; build_block (~utils.Iterator it, ~Node mod, ~utils.File fin)
-	it`.next()
-
-	/; if (it`.at_end() == true)
-		return
-	;/
-
-	~Token cur
-	bool run = true
-
-	/; loop (it`.at_end() == false && run == true)
-		cur = it`.get()
-
-		/; if (utils.strcmp(cur`.data, "module\0") || utils.strcmp(cur`.data, "export\0"))
-			build_module(it, mod, fin)
-
-		;; else if (utils.strcmp(cur`.data, "method\0") == true)
-			build_method(it, mod, fin)
-
-		;; else if (cur`._type == TT_DEFWORD)
-			build_function(it, mod, fin)
-
-		;; else
-			# TODO: ERROR
-
-		;/
-
-		cur = it`.get()
-
-		/; if (utils.strcmp(cur`.data, ";/\0") == true)
-			run = false
-		;; else if (utils.strcmp(cur`.data, ";;\0") == true)
-			it`.next()
-		;; else
-			run = false
-			# report error
-		;/
-	;/
-
-;/
-
-/; build_preproc (~utils.Iterator it, ~Node mod, ~utils.File fin)
-	it`.next()
-	/; if (it`.at_end() == true)
-		return
-	;/
-
-	~Token cur = it`.get()
-	/; if (utils.strcmp(cur`.data, "import\0") == true)
-		# get file path
-		it`.next()
-		cur = it`.get()
-
-		# gen new file struct
-		~uint8 frel = utils.unquote_str(cur`.data)
-		_printf("\nReading file: \0")
-		_printf(frel)
-		_printf("\n\0")
-		utils.File fnew = fin`.relative(frel)
-		
-		# file import
-		build_file(~fnew, mod)
-
-		# cleanup
-		_delete(frel)
-		fnew.end()
-	;; else
-		# unknown preproc
-		return
-	;/
-;/
-
-/; build_vardef (~utils.Iterator it, ~Node mod, ~utils.File fin)
-;/
-
-/; at_defn (~utils.Iterator it) [bool]
-	return false
-;/
-
-~uint8 TOKEN_COUNT = "Token count: %d\n\0"
-
-/; build_file (~utils.File fin, ~Node mod)
-	utils.Vector tokens = tokenize(fin)
-	_print_num(TOKEN_COUNT, tokens.count)
-	
-	utils.Iterator tokit
-	tokit.init(~tokens)
-
-	/; loop (tokit.at_end() == false)
-		~Token t = tokit.get()
-		/; if (utils.strcmp(t`.data, "/;\0") || utils.strcmp(t`.data, ";;\0"))
-			_printf("Block detected!\n\0")
-			build_block(~tokit, mod, fin)
-		;; else if (utils.strcmp(t`.data, "struct\0") == true)
-			_printf("Struct detected!\n\0")
-			build_struct(~tokit, mod, fin)
-		;; else if (utils.strcmp(t`.data, ":\0") == true)
-			_printf("Preproc detected!\n\0")
-			build_preproc(~tokit, mod, fin)
-		;; else if (at_defn(~tokit) == true)
-			_printf("Defn detected!\n\0")
-			build_vardef(~tokit, mod, fin)
-		;; else
-			# _printf("Error detected!\n\0")
-			# TODO: ERROR
-		;/
-
-		tokit.next()
-	;/
-
-	free_token_list(~tokens)
-;/
-
diff --git a/tnslc/compile/compile.tnsl b/tnslc/compile/compile.tnsl
index ed13e00..cda1f62 100644
--- a/tnslc/compile/compile.tnsl
+++ b/tnslc/compile/compile.tnsl
@@ -1,9 +1,3 @@
 /; module compile
-	:import "variable.tnsl"
-	:import "function.tnsl"
-	:import "module.tnsl"
-	:import "tokenizer.tnsl"
-	:import "ast.tnsl"
-	:import "generator.tnsl"
-	:import "error.tnsl"
+	:import "generate.tnsl"
 ;/
diff --git a/tnslc/compile/error.tnsl b/tnslc/compile/error.tnsl
deleted file mode 100644
index 255aec1..0000000
--- a/tnslc/compile/error.tnsl
+++ /dev/null
@@ -1,15 +0,0 @@
-
-~uint8 ERR_NUM = ":%d\0"
-~uint8 TOK_PRNT = " \"%s\": \0"
-
-/; report_error (utils.File file, Token token, ~uint8 message)
-	~uint s = file.path.to_cstr('/')
-	_printf(s)
-	_delete(s)
-	_print_num(ERR_NUM, token.line)
-	_print_num(ERR_NUM, token.col)
-	_print_num(TOK_PRNT, token.data)
-	_printf(message)
-	_printf(newline)
-;/
-
diff --git a/tnslc/compile/function.tnsl b/tnslc/compile/function.tnsl
deleted file mode 100644
index a2d764b..0000000
--- a/tnslc/compile/function.tnsl
+++ /dev/null
@@ -1,10 +0,0 @@
-struct Function {
-	~uint8 name
-}
-
-/; method Function
-	
-	/; end
-	;/
-;/
-
diff --git a/tnslc/compile/generate.tnsl b/tnslc/compile/generate.tnsl
new file mode 100644
index 0000000..4cee1e3
--- /dev/null
+++ b/tnslc/compile/generate.tnsl
@@ -0,0 +1,2 @@
+/; generate (~utils.File fin, fout)
+;/
diff --git a/tnslc/compile/generator.tnsl b/tnslc/compile/generator.tnsl
deleted file mode 100644
index 10c75f8..0000000
--- a/tnslc/compile/generator.tnsl
+++ /dev/null
@@ -1,11 +0,0 @@
-
-
-/; generate (~utils.File fin, fout)
-
-	Node root
-	root.init(NT_MODULE, NULL)
-	build_file(fin, ~root)
-	root.end()
-
-;/
-
diff --git a/tnslc/compile/module.tnsl b/tnslc/compile/module.tnsl
deleted file mode 100644
index 41890b3..0000000
--- a/tnslc/compile/module.tnsl
+++ /dev/null
@@ -1,63 +0,0 @@
-struct Module {
-	~uint8 name,
-	~Module parent,
-	utils.Vector vars, funcs, submods,
-	bool exported
-}
-
-/; method Module
-	/; init (~uint8 name, ~Module parent, bool exported)
-		self.parent = parent
-		self.exported = exported
-		self.name = utils.strclone(name)
-
-		Variable v
-		self.vars.init(len v)
-		Function f
-		self.funcs.init(len f)
-		Module m
-		self.submods.init(len m)
-	;/
-
-	# Assumes that variable will be freed by this module
-	/; add_var (~Variable v)
-		self.vars.push(v)
-	;/
-
-	# Assumes that function will be freed by this module
-	/; add_func (~Function f)
-		self.funcs.push(f)
-	;/
-
-	# Assumes that submod will be freed by this module
-	/; add_sub (~Module s)
-		self.submods.push(s)
-	;/
-
-	# Free all contained substructures
-	/; end
-		_delete(self.name)
-		
-		~Variable v
-		/; loop (int i = 0; i < self.vars.count) [i++]
-			v = self.vars.get(i)
-			v`.end()
-		;/
-
-		~Function f
-		/; loop (int i = 0; i < self.funcs.count) [i++]
-			f = self.funcs.get(i)
-			f`.end()
-		;/
-
-		~Module s
-		/; loop (int i = 0; i < self.submods.count) [i++]
-			s = self.submods.get(i)
-			s`.end()
-		;/
-
-		self.vars.end()
-		self.funcs.end()
-		self.submods.end()
-	;/
-;/
diff --git a/tnslc/compile/tokenizer.tnsl b/tnslc/compile/tokenizer.tnsl
deleted file mode 100644
index 30fc8e8..0000000
--- a/tnslc/compile/tokenizer.tnsl
+++ /dev/null
@@ -1,455 +0,0 @@
-bool HAD_ERROR = false
-
-struct Token {
-	~uint8 data,
-	int
-		_type,
-		line,
-		col,
-	int
-		closing # only has meaning for delimiters
-}
-
-/; method Token
-	/; eq (Token tok) [bool]
-		return utils.strcmp(self.data, tok.data)
-	;/
-
-	/; eq_str(~uint8 str) [bool]
-		return utils.strcmp(self.data, str)
-	;/
-
-	/; sprint [~uint8]
-		utils.Vector out
-		out.init(1)
-
-		~uint8 tmp
-		
-		out.push_char('{')
-
-		out.push_cstr(self.data)
-		
-		out.push_char(',')
-		out.push_char(' ')
-		
-		tmp = utils.int_to_str(self._type)
-		out.push_cstr(tmp)
-		_delete(tmp)
-
-		out.push_char(',')
-		out.push_char(' ')
-		
-		tmp = utils.int_to_str(self.line)
-		out.push_cstr(tmp)
-		_delete(tmp)
-
-		out.push_char(',')
-		out.push_char(' ')
-		
-		tmp = utils.int_to_str(self.col)
-		out.push_cstr(tmp)
-		_delete(tmp)
-		
-		out.push_char('}')
-
-		return out.as_cstr()
-	;/
-;/
-
-/; _is_space(uint8 char) [bool]
-	/; if (char == '\t' || char == '\r' || char == ' ')
-		return true
-	;/
-	return false
-;/
-
-/; _in_csv (~uint8 csv, ~uint8 str) [bool]
-	int along = 0
-
-	/; loop (csv` !== 0) [csv++]
-		/; if (csv` == ',')
-			/; if (along !< 0 && str{along} == 0)
-				return true
-			;/
-			along = 0
-		;; else if (along !< 0 && str{along} == csv`)
-			along++
-		;; else
-			along = 0
-			along--
-		;/
-	;/
-
-	return along !< 0 && str{along} == 0
-;/
-
-/; _str_contains (~uint8 str, uint8 ch) [bool]
-	/; loop (str` !== 0) [str++]
-		/; if (str` == ch)
-			return true
-		;/
-	;/
-	return false
-;/
-
-~uint8 KEYWORDS = "module,export,asm,if,else,loop,label,goto,continue,break,return,import,as,using,struct,method,interface,enum,implements,operator,is\0"
-~uint8 KEYTYPES = "uint8,uint16,uint32,uint64,uint,int8,int16,int32,int64,int,float32,float64,float,bool,vect,void\0"
-~uint8 LITERALS = "false,true\0"
-
-~uint8 RESERVED = "~`!@#$%^&*()[]{}-+=\"\'\\|;:/?.>,<\0"
-
-~uint8 OPS = "`~!%^&|*-=+./><\0"
-~uint8 MULTI_OPS = "==,&&,||,^^,!==,!&&,!||,!^^,!<,!>,<<,>>,!&,!|,!^,++,--,>==,<==,len,!=\0"
-
-~uint8 DELIMS = "()[]{}\0"
-~uint8 MULTI_DELIMS = ";:#\0"
-
-int TT_DEFWORD = 0
-int TT_KEYWORD = 1
-int TT_KEYTYPE = 2
-int TT_LITERAL = 3
-int TT_AUGMENT = 4
-int TT_DELIMIT = 5
-int TT_SPLITTR = 6
-int TT_INVALID = 7
-
-/; opposite_delim (uint8 c) [uint8]
-	/; loop (int i = 0; DELIMS{i} !== 0) [i++]
-		/; if (DELIMS{i} == c)
-			/; if (i % 2 == 1)
-				return DELIMS{i - 1}
-			;; else
-				return DELIMS{i + 1}
-			;/
-		;/
-	;/
-	return c
-;/
-
-/; is_delim (~uint8 str) [bool]
-	int l = utils.strlen(str)
-
-	/; if (l == 1 && _str_contains(DELIMS, str`) == true)
-		return true
-	;; else if (l == 2)
-		/; if (_str_contains(MULTI_DELIMS, str{0}) == true)
-			return (str{0} == str{1} && str{0} !== '#') || str{1} == '/'
-		;; else if (_str_contains(MULTI_DELIMS, str{1}) == true)
-			return str{0} == '/'
-		;/
-	;/
-	return false
-;/
-
-/; token_type (~uint8 str) [int]
-	int l = utils.strlen(str)
-
-	/; if (l < 1)
-		return TT_INVALID
-	;/
-
-	/; if (is_delim(str) == true)
-		return TT_DELIMIT
-	;; else if (l == 1 && is_reserved(str{0}) == true)
-		/; if (_str_contains(OPS, str{0}) == true)
-			return TT_AUGMENT
-		;; else if (str` == ',' || str` == ';' || str` == ':')
-			return TT_SPLITTR
-		;/
-	;; else if (_in_csv(MULTI_OPS, str) == true)
-		return TT_AUGMENT
-	;; else if (_in_csv(KEYTYPES, str) == true)
-		return TT_KEYTYPE
-	;; else if (_in_csv(KEYWORDS, str) == true)
-		return TT_KEYWORD
-	;; else if (_in_csv(LITERALS, str) == true)
-		return TT_LITERAL
-	;/
-
-	return TT_DEFWORD
-;/
-
-/; is_reserved (uint8 char) [bool]
-	return _str_contains(RESERVED, char)
-;/
-
-/; parse_nl_token (~int line, col) [Token]
-	Token out
-	out.line = line`
-	out.col = col`
-	out._type = TT_SPLITTR
-
-	out.data = _alloc(2)
-	out.data{0} = '\n'
-	out.data{1} = 0
-
-	col` = 1
-	line` = line` + 1
-
-	return out
-;/
-
-/; parse_comment (~utils.File fin, ~uint8 char)
-	/; loop (fin`.at_end == false && char` !== '\n')
-		char` = fin`.read()
-	;/
-;/
-
-/; parse_string_token(~utils.File fin, ~uint8 char, ~int line, col) [Token]
-	utils.Vector str
-	str.init(1)
-	str.push_char(char`)
-
-	uint8 first = char`
-
-	Token out
-	out.line = line`
-	out.col = col`
-	out._type = TT_LITERAL
-
-	char` = fin`.read()
-	col`++
-	/; loop (char` !== first && fin`.at_end == false)
-		/; if (char` == '\\')
-			str.push_char(char`)
-			char` = fin`.read()
-			col`++
-			/; if (fin`.at_end == false)
-				/; if (char` == '\n')
-					line`++
-					col` = 0
-				;/
-				str.push_char(char`)
-				char` = fin`.read()
-				col`++
-			;/
-		;; else
-			/; if (char` == '\n')
-				line`++
-				col` = 0
-			;/
-			str.push_char(char`)
-			char` = fin`.read()
-			col`++
-		;/
-	;/
-	
-	/; if (fin`.at_end == false)
-		char` = fin`.read()
-	;/
-
-	str.push_char(first)
-
-	out.data = str.as_cstr()
-
-	return out
-;/
-
-/; in_num_range (uint8 char) [bool]
-	bool dec = char !< '0' && char !> '9'
-	
-	bool hex = char !< 'a' && char !> 'f'
-	bool HEX = char !< 'A' && char !> 'F'
-	hex = hex || HEX
-
-	return dec || hex || char == '.'
-;/
-
-/; parse_numeric_token (~utils.File fin, ~uint8 char, ~int line, col) [Token]
-	Token out
-	out.line = line`
-	out.col = col`
-	out._type = TT_LITERAL
-
-	utils.Vector num
-	num.init(1)
-	num.push_char(char`)
-
-	char` = fin`.read()
-	col`++
-	
-	bool dec = false, ok = true
-
-	/; loop (fin`.at_end == false && ok == true)
-		/; if (char` == '.' && dec == true)
-			ok = false
-		;; else if (char` == '.')
-			dec = true
-		;/
-
-		/; if (ok == true && in_num_range(char`) == true)
-			num.push_char(char`)
-			char` = fin`.read()
-			col`++
-		;; else
-			ok = false
-		;/
-	;/
-	
-	out.data = num.as_cstr()
-
-	return out
-;/
-
-/; parse_word_token (~utils.File fin, ~uint8 char, ~int line, col) [Token]
-	Token out
-	out.line = line`
-	out.col = col`
-
-	utils.Vector str
-	str.init(1)
-
-	bool ok = true
-
-	/; loop (fin`.at_end == false && ok == true)
-		str.push_char(char`)
-
-		char` = fin`.read()
-		col`++
-
-		/; if (char` == '\n' || _is_space(char`) == true || is_reserved(char`) == true)
-			ok = false
-		;/
-	;/
-
-	out.data = str.as_cstr()
-	out._type = token_type(out.data)
-	return out
-;/
-
-~uint8 ERROR_RESERVED = "unexpected reserved token in file\0"
-
-/; parse_reserved_tokens (~utils.File fin, ~uint8 char, ~int line, col, ~utils.Vector out)
-	Token tmp
-	tmp.line = line`
-	tmp.col = col`
-
-	utils.Vector res
-	res.init(1)
-
-	bool ok = true
-
-	/; loop (fin`.at_end == false && ok == true)
-
-		res.push_char(char`)
-		int after = token_type(res.as_cstr())
-
-		/; if (after == TT_DEFWORD)
-			bool res_unexpected = true
-			/; if (res.count > 1)
-				res.pop()
-				res_unexpected = false
-			;/
-
-			tmp.data = res.as_cstr()
-			tmp._type = token_type(tmp.data)
-			
-			/; if (res_unexpected == true)
-				HAD_ERROR = true
-				report_error(fin`, tmp, ERROR_RESERVED)
-			;/
-			
-			out`.push(~tmp)
-
-			res.init(1)
-			res.push_char(char`)
-			tmp.col = col`
-		;/
-
-		char` = fin`.read()
-		col`++
-
-		/; if (is_reserved(char`) == false || char` == '\"' || char` == '\'')
-			ok = false
-		;/
-	;/
-
-	/; if (res.count > 0)
-		tmp.data = res.as_cstr()
-		tmp._type = token_type(tmp.data)
-		out`.push(~tmp)
-	;; else 
-		res.end()
-	;/
-;/
-
-~uint8 RES_LOL = "Reserved %c\n\0"
-~uint8 PUSH = "Pushing token %s\n\0"
-
-/; tokenize (~utils.File fin) [utils.Vector]
-	# create a tmp token
-	Token tok
-	tok._type = TT_INVALID
-
-	utils.Vector out, delims
-
-	# init vectors
-
-	out.init(len tok)
-	delims.init(8) # A stack of delimiters
-	
-	# open file for reading
-	fin`.open()
-	
-	# main counters for line and col
-	uint line = 1, col = 1
-
-	# main loop
-	uint8 char = fin`.read()
-	/; loop (fin`.at_end == false)
-		/; if (_is_space(char) == true)
-			# skip spaces
-			char = fin`.read()
-			col++
-
-		;; else if (char == '#')
-			parse_comment(fin, ~char)
-
-		;; else if (char == '\"' || char == '\'')
-			# Generate string literals
-			tok = parse_string_token(fin, ~char, ~line, ~col)
-
-		;; else if (char !< '0' && char !> '9')
-			# handle numeric literals
-			tok = parse_numeric_token(fin, ~char, ~line, ~col)
-
-		;; else if (is_reserved(char) == true)
-			parse_reserved_tokens(fin, ~char, ~line, ~col, ~out)
-
-		;; else if (char !== '\n')
-			# word tokens
-			tok = parse_word_token(fin, ~char, ~line, ~col)
-		;/
-
-		/; if (tok._type !== TT_INVALID)
-			out.push(~tok)
-			tok._type = TT_INVALID
-		;/
-
-		/; if (char == '\n')
-			tok = parse_nl_token(~line, ~col)
-			char = fin`.read()
-			out.push(~tok)
-			tok._type = TT_INVALID
-		;/
-	;/
-
-	delims.end()
-	
-	# done with file
-	fin`.close()
-
-	return out
-;/
-
-/; free_token_list (~utils.Vector vec)
-	~Token t
-	
-	/; loop (int i = 0; i < vec`.count) [i++]
-		t = vec`.get(i)
-		_delete(t`.data)
-	;/
-
-	vec`.end()
-;/
-
diff --git a/tnslc/compile/variable.tnsl b/tnslc/compile/variable.tnsl
deleted file mode 100644
index af6f6c1..0000000
--- a/tnslc/compile/variable.tnsl
+++ /dev/null
@@ -1,9 +0,0 @@
-struct Variable {
-	~uint8 name
-}
-
-/; method Variable
-
-	/; end
-	;/
-;/
diff --git a/tnslc/parse/ast.tnsl b/tnslc/parse/ast.tnsl
new file mode 100644
index 0000000..e69de29
diff --git a/tnslc/parse/parse.tnsl b/tnslc/parse/parse.tnsl
new file mode 100644
index 0000000..c225cf9
--- /dev/null
+++ b/tnslc/parse/parse.tnsl
@@ -0,0 +1,4 @@
+/; module parse
+	:import "tokenizer.tnsl"
+	:import "ast.tnsl"
+;/
diff --git a/tnslc/parse/tokenizer.tnsl b/tnslc/parse/tokenizer.tnsl
new file mode 100644
index 0000000..801d8fa
--- /dev/null
+++ b/tnslc/parse/tokenizer.tnsl
@@ -0,0 +1,90 @@
+
+uint TTYPE_DELIM = 0
+uint TTYPE_SEP   = 1
+uint TTYPE_KEYWD = 2
+uint TTYPE_KEYTP = 3
+uint TTYPE_LITRL = 4
+uint TTYPE_AUG   = 5
+uint TTYPE_USRWD = 6
+
+uint TTYPE_ERR   = 999
+
+struct Token {
+	uint _type,
+	~uint8 data,
+	uint line, col
+}
+
+~uint8 KEYWORDS = "import,module,export,struct,method,operator,if,else,loop,continue,break,return"
+~uint8 KEYTYPES = "uint8,uint16,uint32,uint64,uint,int8,int16,int32,int64,int,float32,float64,float,bool,void,vect,type"
+
+/; produce_word_token (~utils.File fin, Token prev) [Token]
+	Token out
+	return out
+;/
+
+/; produce_int_token (~utils.File fin, Token prev) [Token]
+	Token out
+	out._type = TTYPE_LITRL
+	out.line = prev.line
+	out.col = prev.col
+	return out
+;/
+
+/; produce_string_token (~utils.File fin, Token prev) [Token]
+	Token out
+	out._type = TTYPE_LITRL
+	out.line = prev.line
+	out.col = prev.col
+
+	utils.Vector store
+	store.init(1)
+	uint8 delim = fin`.read()
+	store.push(~delim)
+
+	out.data = store.as_cstr()
+
+	return out
+;/
+
+/; produce_reserved_token (~utils.File fin, Token prev) [Token]
+	Token out
+	
+	return out
+;/
+
+/; is_whitespace (uint8 ch) [bool]
+	/; if (ch > 8 && ch < 14)
+		return true
+	;; else if (ch == ' ')
+		return true
+	;/
+	return false
+;/
+
+/; is_reserved [bool]
+	return false
+;/
+
+/; produce_next_token (~utils.File fin, Token prev) [Token]
+	# /; if (prev.data !== 0)
+		prev.col = prev.col + utils.strlen(prev.data)
+	# ;/
+
+	uint8 first = fin`.read()
+	/; loop (is_whitespace(first) == true)
+		first = fin`.read()
+	;/
+	fin`.unread()
+	
+	/; if (first == '\'' || first == '\"')
+		return produce_string_token(fin, prev)
+	;/
+
+	Token out
+	out.line = prev.line
+	out.col = prev.col
+	out._type = TTYPE_ERR
+	return out
+;/
+
diff --git a/tnslc/tnslc.tnsl b/tnslc/tnslc.tnsl
index bc3fbb9..e95a63b 100644
--- a/tnslc/tnslc.tnsl
+++ b/tnslc/tnslc.tnsl
@@ -1,10 +1,11 @@
 :import "utils/utils.tnsl"
+:import "parse/parse.tnsl"
 :import "compile/compile.tnsl"
 
 ~uint8 DEFAULT_FOUT = "out.asm\0"
 
 ~uint8 USAGE = "
-TNSLC v0.6.0 (C) 2024 CircleShift Softworks
+TNSLC v0.6.0 (C) 2024 CircleShift (MPL 2.0)
 
 usage:
 	tnslc (file in) [file out]
diff --git a/tnslc/utils/algo.tnsl b/tnslc/utils/algo.tnsl
index a08c773..73cfb7f 100644
--- a/tnslc/utils/algo.tnsl
+++ b/tnslc/utils/algo.tnsl
@@ -194,7 +194,7 @@
 	return str
 ;/
 
-/; strclone(~uint8 cstr) [~uint8]
+/; strcpy(~uint8 cstr) [~uint8]
 	Vector out
 	out.from_cstr(cstr)
 	return out.as_cstr()
diff --git a/tnslc/utils/c_wrap_linux.tnsl b/tnslc/utils/c_wrap_linux.tnsl
index 814ec1c..1e3155e 100644
--- a/tnslc/utils/c_wrap_linux.tnsl
+++ b/tnslc/utils/c_wrap_linux.tnsl
@@ -1,5 +1,5 @@
 # Must be included at the top of the file
-asm "extern malloc, realloc, free, printf, open, close, read, write"
+asm "extern malloc, realloc, free, printf, open, close, read, write, fseek"
 
 {}uint8 _alert = "Alert!\n\0"
 {}uint8 _dec = "%d\n\0"
@@ -202,6 +202,30 @@ asm "extern malloc, realloc, free, printf, open, close, read, write"
     return out
 ;/
 
+/; _fseek (~void handle, uint pos) [int]
+	int out
+
+	# align stack
+    asm "mov rax, rsp"
+    asm "xor rdx, rdx"
+    asm "mov rcx, 16"
+    asm "div rcx"
+    asm "sub rsp, rdx"
+    # add buffer zone to stack
+	asm "sub rsp, 128"
+
+	# Call c func
+	asm "mov rdi, r10"
+	asm "mov rsi, r11"
+	asm "mov rdx, 0" # standard value for SEEK_SET as per GNU libc
+    asm "call fseek wrt ..plt"
+
+	# get return value
+	asm "mov r12, rax"
+
+	return out
+;/
+
 /; _write_byte (~void handle, ~uint8 byte) [int]
     int out
 
diff --git a/tnslc/utils/file.tnsl b/tnslc/utils/file.tnsl
index 978f31b..22b11f1 100644
--- a/tnslc/utils/file.tnsl
+++ b/tnslc/utils/file.tnsl
@@ -1,6 +1,7 @@
 struct File {
 	Artifact path,
 	~void handle,
+	uint pos,
 	bool at_end
 }
 
@@ -14,6 +15,7 @@ struct File {
 		self.path.split_cstr(str, '/')
 		self.handle = NULL
 		self.at_end = false
+		self.pos = 0
 	;/
 
 	/; relative (~uint8 path) [File]
@@ -75,15 +77,35 @@ struct File {
 	;/
 
 	/; read [uint8]
+		/; if (self.at_end == true)
+			return 0
+		;/
+
 		uint8 out
 		int bytes = _read_byte(self.handle, ~out)
+		self.pos = self.pos + 1
+		
 		/; if (bytes == 0)
 			self.at_end = true
 			return 0
 		;/
+		
 		return out
 	;/
 
+	/; unread
+		/; if (self.pos < 1)
+			return
+		;/
+
+		_fseek(self.handle, self.pos - 1)
+		self.pos = self.pos - 1
+
+		/; if (self.at_end == true)
+			self.at_end = false
+		;/
+	;/
+
 	/; write (uint8 byte)
 		int written = _write_byte(self.handle, ~byte)
 		/; if (written == 0)
-- 
cgit v1.2.3