summaryrefslogtreecommitdiff
path: root/tnslc/parse/tokenizer.tnsl
blob: 7a1f085db4ee9fe859859368c1cef3408f6a6137 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/#
	Copyright 2021 Kyle Gunger

	This file is licensed under the CDDL 1.0 (the License)
	and may only be used in accordance with the License.
	You should have received a copy of the License with this
	software/source code. If you did not, a copy can be found
	at the following URL:

	https://opensource.org/licenses/CDDL-1.0

	THIS SOFTWARE/SOURCE CODE IS PROVIDED "AS IS" WITH NO
	WARRANTY, GUARANTEE, OR CLAIM OF FITNESS FOR ANY PURPOSE
	EXPRESS OR IMPLIED
#/

/; is_float (~{}uint8 dat) [bool]
	;return is_numeric_literal(dat) && is_in_string(dat, '.')
;/

/; break_token ({}uint8 dat, uint8 c) [bool]
	/; if (len dat == 0)
		;return false

	;; else if (dat{0} == '"' || dat{0} == '\'')
		;return string_closed(dat, c)

	;; else if (is_in_string(~RESERVED, dat{len dat - 1}))

		/; if (is_in_string(~RESERVED, c))
			;dat.append(c)
			;return get_token_type(~dat) == TOKEN_TYPE.DEFWORD

		;; else if (len dat == 1 && dat{0} == '.')
			;return !is_digit(c)

		;/

		;return true

	;; else if (is_in_string(~RESERVED, c))

		/; if (is_numeric_literal(~dat) && !is_float(~dat) && c == '.')
			;return false

		;/

		;return true
	;/
	
	;return is_whitespace(c)
;/

/; strip_and_expand (~{}Token dat) [{}Token]
	;{}Token out = {}

	;bool cblk = false

	/; loop (int i = 0; i < len dat`) [i++]
		/; if (!cblk)
			/; if (string_equate(dat`{i}.data`, "/#"))
				;cblk = true
			;; else
				;out.append(dat`{i})
			;/

		;; else if (string_equate(dat`{i}.data`, "#/"))
			;cblk = false
		;/
	;/
	
	;return out
;/

/; tokenize (tnsl.io.File fstr) [~{}Token]
	;{}Token out = {}
	;{}uint8 tdat = {}
	;bool comment = false
	;int line = 1, col = 1

	/; loop (int i = fstr.read(); i !== -1) [i = fstr.read()]
		/; if (break_token(tdat, i) && !comment)
			/; if (len tdat == 1 && tdat{0} == '#')
				;tdat = {}
				;comment = true
			;; else if (len tdat > 0)
				;{}uint8 tmp = tdat
				;Token ttk = {get_token_type(~tmp), line, col, ~tmp}
				;out.append(ttk)
				;tdat = {}
			;/
		;/

		/; if ( !is_whitespace(i) && !comment )
			;tdat.append(i)
		;; else if (i == '\n')
			;line++
			;col = 0
			/; if (comment)
				;comment = false
			;/
		;/

		;col++
	;/

	/; if (len tdat > 0)
		;Token ttk = {get_token_type(~tdat), line, col, ~tdat}
		;out.append(ttk)
	;/

	;out = strip_and_expand(~out)
	;return ~out
;/