diff options
| -rw-r--r-- | tnslc/parse/ast.tnsl | 21 | ||||
| -rw-r--r-- | tnslc/parse/tokenizer.tnsl | 293 | ||||
| -rw-r--r-- | tnslc/tests/simple/comments.tnsl | 3 | ||||
| -rw-r--r-- | tnslc/tnslc.tnsl | 4 | ||||
| -rw-r--r-- | tnslc/utils/c_wrap_linux.tnsl | 4 | ||||
| -rw-r--r-- | tnslc/utils/file.tnsl | 2 | 
6 files changed, 233 insertions, 94 deletions
| diff --git a/tnslc/parse/ast.tnsl b/tnslc/parse/ast.tnsl index e69de29..554aac2 100644 --- a/tnslc/parse/ast.tnsl +++ b/tnslc/parse/ast.tnsl @@ -0,0 +1,21 @@ + +uint16 NTYPE_MOD = 0 +uint16 NTYPE_STRUCT = 1 +uint16 NTYPE_ID = 2 +uint16 NTYPE_BINOP = 3 +uint16 NTYPE_PREOP = 4 +uint16 NTYPE_POSTOP = 5 +uint16 NTYPE_FUNCTION = 6 + + +struct Node { +	uint16 _type, +	~uint8 data, +	utils.Vector sub +} + +/; generate_ast (~utils.File fin) [Node] +	Node out +	return out +;/ + diff --git a/tnslc/parse/tokenizer.tnsl b/tnslc/parse/tokenizer.tnsl index fcc3c5c..0df0ef8 100644 --- a/tnslc/parse/tokenizer.tnsl +++ b/tnslc/parse/tokenizer.tnsl @@ -6,7 +6,9 @@ uint TTYPE_KEYTP = 3  uint TTYPE_LITRL = 4  uint TTYPE_AUG   = 5  uint TTYPE_USRWD = 6 +uint TTYPE_COMNT = 7 +uint TTYPE_UNKNOWN = 998  uint TTYPE_ERR   = 999  struct Token { @@ -75,36 +77,31 @@ uint   MAX_MULTI  = 3  	Token out  	out.line = prev.line  	out.col = prev.col +	out._type = TTYPE_USRWD  	utils.Vector tmp  	tmp.init(1) -	 +  	uint8 ch = fin`.read() -	tmp.push(~ch) -	/; loop (bool run = true) [run == true] +	/; loop (fin`.at_end == false && is_reserved(ch) == false && is_whitespace(ch) == false) +		tmp.push(~ch)  		ch = fin`.read() -		/; if (ch == 0) -			run = false -		;; else if (is_reserved(ch) == true || is_whitespace(ch) == true) -			fin`.unread() -			run = false -		;; else -			tmp.push(~ch) -		;/  	;/ -	~uint8 str = tmp.as_cstr() -	/; if (_in_csv(KEYWORDS, str) == true) +	/; if (fin`.at_end == false) +		fin`.unread() +	;/ + +	out.data = tmp.as_cstr() +	/; if (_in_csv(KEYWORDS, out.data) == true)  		out._type = TTYPE_KEYWD -	;; else if (_in_csv(KEYTYPES, str) == true) -		out._type == TTYPE_KEYTP -	;; else if (_in_csv(LITERALS, str) == true) -		out._type == TTYPE_LITRL -	;; else if (_in_csv(MULTI_OP_W, str) == true) +	;; else if (_in_csv(KEYTYPES, out.data) == true) +		out._type = TTYPE_KEYTP +	;; else if (_in_csv(LITERALS, out.data) == true) +		out._type = TTYPE_LITRL +	;; else if (_in_csv(MULTI_OP_W) == true)  		out._type = TTYPE_AUG -	;; else -		out._type = TTYPE_USRWD  	;/  	return out @@ -112,79 +109,135 @@ uint   MAX_MULTI  = 3  /; produce_string_token (~utils.File fin, Token prev) [Token]  	Token out -	out._type = TTYPE_LITRL  	out.line = prev.line  	out.col = prev.col +	out._type = TTYPE_LITRL + +	utils.Vector tmp +	tmp.init(1) -	utils.Vector store -	store.init(1)  	uint8 delim = fin`.read() -	store.push(~delim) +	tmp.push(~delim)  	/; loop (fin`.at_end == false && delim !== 0) -		uint8 tmp = fin`.read() -		store.push(~tmp) -		/; if(tmp == '\\') -			tmp = fin`.read() -			store.push(~tmp) -		;; else if (tmp == delim) -			delim = 0 -		;; else if (tmp == '\n') +		uint8 ch = fin`.read() +		/; if (ch == '\\') +			tmp.push(~ch) +			ch = fin`.read() +		;; else if (ch == '\n')  			out.line++ +		;; else if (ch == delim) +			delim = 0 +		;/ +		 +		/; if (ch !== 0) +			tmp.push(~ch)  		;/  	;/ -	out.data = store.as_cstr() - +	out.data = tmp.as_cstr()  	return out  ;/ +/; comment_line (~utils.File fin) +	uint8 ch = fin`.read() + +	/; loop (fin`.at_end == false && ch !== '\n') +		ch = fin`.read() +	;/ + +	/; if (fin`.at_end == false) +		fin`.unread() +	;/ +;/ + +/; comment_block (~utils.File fin, ~Token out) +	uint8 ch = 1 +	/; loop (fin`.at_end == false && ch !== 0) +		ch = fin`.read() +		/; if (ch == '#') +			ch = fin`.read() +			/; if (ch == '/') +				ch = 0 +			;; else +				comment_line(fin) +			;/ +		;/ + +		/; if (ch == '\n') +			out`.line++ +		;/ +	;/ +;/ + +/; is_comment_block (~uint8 str) [bool] +	return utils.strcmp(str, "/#\0") +;/ + +/; is_multi_delim(~uint8 str) [bool] +	/; if (utils.strcmp(str, "/;\0") == true) +		return true +	;; else if (utils.strcmp(str, ";;\0") == true) +		return true +	;; else if (utils.strcmp(str, ";/\0") == true) +		return true +	;/ +	return false +;/ +  /; produce_reserved_token (~utils.File fin, Token prev) [Token]  	Token out +	out.line = prev.line +	out.col = prev.col +	out._type = TTYPE_USRWD +  	utils.Vector tmp  	tmp.init(1) -	out.line = prev.line -	out.col = prev.col +	uint8 ch = fin`.read() -	/; loop (int i = 0; i < MAX_MULTI) [i++] -		uint8 ch = fin`.read() -		/; if (is_reserved(ch) == true) -			tmp.push(~ch) -		;; else -			fin`.unread() +	/; if (ch == '#') +		tmp.push(~ch) +		out._type = TTYPE_COMNT +		out.data = tmp.as_cstr() +		comment_line(fin) +		return out +	;/ + +	tmp.push(~ch) +	/; loop (int i = 1; i < MAX_MULTI) [i++] +		ch = fin`.read() +		/; if (is_reserved(ch) == false)  			i = MAX_MULTI +			fin`.unread() +		;; else +			tmp.push(~ch)  		;/  	;/ -	 -	/; loop (bool run = true) [run == true] -		/; if (tmp.count < 2) -			run = false -			~uint8 ch = tmp.get(0) -			/; if (ch` == ';' || ch` == ',') + +	/; loop (bool run = true; run == true) +		~uint8 str = tmp.as_cstr() +		/; if (tmp.count == 1) +			/; if (str` == ',' || str` == ';')  				out._type = TTYPE_SEP -			;; else if (_str_contains(DELIMS, ch`) == true) -				out._type = TTYPE_DELIM -			;; else if (_str_contains(OP, ch`) == true) +			;; else if (_str_contains(OP, str`))  				out._type = TTYPE_AUG +			;; else if (_str_contains(DELIMS, str`)) +				out._type = TTYPE_DELIM +			;; else +				out._type = TTYPE_UNKNOWN  			;/ -		;; else if (_in_csv(MULTI_OP, tmp.as_cstr()) == true)  			run = false +		;; else if (_in_csv(MULTI_OP, str) == true)  			out._type = TTYPE_AUG -		;; else if (tmp.count == 2) -			~uint8 cha = tmp.get(0) -			~uint8 chb = tmp.get(0) -			/; if (cha` == ';' && chb` == ';') -				run = false -			;; else if (cha` == '/' && chb` == ';') -				run = false -			;; else if (cha` == ';' && chb` == '/') -				run = false -			;/ - -			/; if (run == false) -				out._type = TTYPE_DELIM -			;/ +			run = false +		;; else if (is_comment_block(str) == true) +			out._type = TTYPE_COMNT +			comment_block(fin, ~out) +			run = false +		;; else if (is_multi_delim(str) == true) +			out._type = TTYPE_DELIM +			run = false  		;; else  			tmp.pop()  			fin`.unread() @@ -192,50 +245,42 @@ uint   MAX_MULTI  = 3  	;/  	out.data = tmp.as_cstr() -  	return out  ;/  /; produce_numeric_token (~utils.File fin, Token prev) [Token]  	Token out -	out._type = TTYPE_LITRL  	out.line = prev.line  	out.col = prev.col +	out._type = TTYPE_LITRL  	utils.Vector tmp  	tmp.init(1) +	  	uint8 ch = fin`.read()  	tmp.push(~ch) - -	bool alt_base = false +	bool base = false  	/; if (ch == '0')  		ch = fin`.read() -		/; if (ch !< 'a' && ch !> 'z') -			alt_base = true -		;; else if (ch !< 'A' && ch !> 'Z') -			alt_base = true -		;; else if (is_reserved(ch) == true) -			fin`.unread() -			out.data = tmp.as_cstr() -			return out -		;; else if (ch == 0) -			out.data = tmp.as_cstr() -			return out +		/; if (is_reserved(ch) == false && is_whitespace(ch) == false && is_numeric(ch) == false) +			base = true +			tmp.push(~ch)  		;/ -		tmp.push(~ch)  	;/ -	/; loop (bool run = true) [run == true] +	bool decimal = false +	/; loop (bool run = true; run == true && fin`.at_end == false)  		ch = fin`.read() -		/; if (is_numeric(ch) == false && alt_base == false) +		/; if (decimal == false && ch == '.') +			decimal = true +			tmp.push(~ch) +		;; else if (is_reserved(ch) == true || is_whitespace(ch) == true)  			fin`.unread()  			run = false -		;; else if (is_reserved(ch) == true) +		;; else if (is_numeric(ch) == false && base == false)  			fin`.unread()  			run = false -		;; else if (ch == 0 || fin`.at_end == true) -			run = false -		;; else +		;; else if (ch !== 0)  			tmp.push(~ch)  		;/  	;/ @@ -254,7 +299,7 @@ uint   MAX_MULTI  = 3  ;/  /; is_reserved (uint8 ch) [bool] -	return _str_contains(RESERVED, ch) +	return _str_contains(RESERVED, ch) == true  ;/  /; is_numeric (uint8 ch) [bool] @@ -306,3 +351,73 @@ uint   MAX_MULTI  = 3  	return produce_next_token(fin, tmp)  ;/ +/; gen_token_list (~utils.File fin) [utils.Vector] +	utils.Vector out +	Token tmp +	out.init(len tmp) +	 +	fin`.open() +	tmp = produce_first_token(fin) +	/; loop (tmp._type !== TTYPE_ERR) +		/; if (tmp._type !== TTYPE_COMNT) +			out.push(~tmp) +			tmp = produce_next_token(fin, tmp) +		;; else +			Token com = tmp +			tmp = produce_next_token(fin, com) +			com.end() +		;/ +	;/ + +	return out +;/ + +/; print_token_type(Token t) +	 +	/; if (t._type == TTYPE_DELIM) +		_printf("DELIM\0") +	;; else if (t._type == TTYPE_SEP) +		_printf("SEP\0") +	;; else if (t._type == TTYPE_KEYWD) +		_printf("KEYWD\0") +	;; else if (t._type ==TTYPE_KEYTP) +		_printf("KEYTP\0") +	;; else if (t._type == TTYPE_LITRL) +		_printf("LITRL\0") +	;; else if (t._type == TTYPE_AUG) +		_printf("AUG\0") +	;; else if (t._type == TTYPE_USRWD) +		_printf("USRWD\0") +	;; else if (t._type == TTYPE_COMNT) +		_printf("COMNT\0") +	;; else if (t._type == TTYPE_UNKNOWN) +		_printf("UNKNOWN\0") +	;; else if (t._type == TTYPE_ERR) +		_printf("ERR\0") +	;/ + +;/ + +/; print_token_list (~utils.Vector vec) +	~Token tok +	/; loop (uint i = 0; i < vec`.count) [i++] +		tok = vec.get(i) +		_printf("Token {\0") +		_printf(tok`.data) +		_print_num(", line: %u\0", tok`.line) +		_print_num(", col: %u, type: \0", tok`.col) +		print_token_type(tok`) +		_printf("}\n\0") +	;/ +;/ + +/; end_token_list (~utils.Vector vec) +	~Token tok +	 +	/; loop (uint i = 0; i < vec`.count) [i++] +		tok = vec`.get(i) +		tok`.end() +	;/ +	vec`.end() +;/ + diff --git a/tnslc/tests/simple/comments.tnsl b/tnslc/tests/simple/comments.tnsl index dbece20..36079e4 100644 --- a/tnslc/tests/simple/comments.tnsl +++ b/tnslc/tests/simple/comments.tnsl @@ -24,7 +24,8 @@  #    It is a doc comment of a code block because it starts with '/##' instead of '/#'  #    and ends with '# ;' which ends the comment and opens a block.  #    This doc comment is on the main function -#; main /# Comment inside function declaration #/ [int /# Comment inside this list of outputs #/ ] +#/ +/; main /# Comment inside function declaration #/ [int /# Comment inside this list of outputs #/ ]      return 0 # line comment inside a function      /# Block comment inside function #/  ;/ diff --git a/tnslc/tnslc.tnsl b/tnslc/tnslc.tnsl index e95a63b..bb7992c 100644 --- a/tnslc/tnslc.tnsl +++ b/tnslc/tnslc.tnsl @@ -35,7 +35,9 @@ usage:  		fout.init(DEFAULT_FOUT)  	;/ -	compile.generate(~fin, ~fout) +	utils.Vector v = parse.gen_token_list(~fin) +	parse.print_token_list(~v) +	parse.end_token_list(~v)  	fin.end()  	fout.end() diff --git a/tnslc/utils/c_wrap_linux.tnsl b/tnslc/utils/c_wrap_linux.tnsl index 1e3155e..62c3962 100644 --- a/tnslc/utils/c_wrap_linux.tnsl +++ b/tnslc/utils/c_wrap_linux.tnsl @@ -1,5 +1,5 @@  # Must be included at the top of the file -asm "extern malloc, realloc, free, printf, open, close, read, write, fseek" +asm "extern malloc, realloc, free, printf, open, close, read, write, lseek"  {}uint8 _alert = "Alert!\n\0"  {}uint8 _dec = "%d\n\0" @@ -218,7 +218,7 @@ asm "extern malloc, realloc, free, printf, open, close, read, write, fseek"  	asm "mov rdi, r10"  	asm "mov rsi, r11"  	asm "mov rdx, 0" # standard value for SEEK_SET as per GNU libc -    asm "call fseek wrt ..plt" +    asm "call lseek wrt ..plt"  	# get return value  	asm "mov r12, rax" diff --git a/tnslc/utils/file.tnsl b/tnslc/utils/file.tnsl index 22b11f1..1d8a1e9 100644 --- a/tnslc/utils/file.tnsl +++ b/tnslc/utils/file.tnsl @@ -98,8 +98,8 @@ struct File {  			return  		;/ -		_fseek(self.handle, self.pos - 1)  		self.pos = self.pos - 1 +		_fseek(self.handle, self.pos)  		/; if (self.at_end == true)  			self.at_end = false |