From 9478e157ec2cfe4de704b3bd78b07aee8824774f Mon Sep 17 00:00:00 2001 From: Kyle Gunger Date: Mon, 18 Apr 2022 01:19:02 -0400 Subject: Add tokenizer support for strings --- tnslc/dummy.tnsl | 1 + tnslc/parse/token.tnsl | 23 ++++++++++++++++++++++- tnslc/parse/tokenizer.tnsl | 5 ++--- 3 files changed, 25 insertions(+), 4 deletions(-) (limited to 'tnslc') diff --git a/tnslc/dummy.tnsl b/tnslc/dummy.tnsl index edecab5..a2a8156 100644 --- a/tnslc/dummy.tnsl +++ b/tnslc/dummy.tnsl @@ -1,4 +1,5 @@ /; main [float] ;int i = 0 + ;{}charp c = "abc" ;return a.b ;/ diff --git a/tnslc/parse/token.tnsl b/tnslc/parse/token.tnsl index e18ade5..ce2bcdb 100644 --- a/tnslc/parse/token.tnsl +++ b/tnslc/parse/token.tnsl @@ -294,6 +294,27 @@ ;return true ;/ +/; string_closed ({}charp dat, charp c) [bool] + /; if (len dat < 2) + ;return false + ;/ + + ;charp closing = dat{0} + ;bool escaping = false + + /; loop (int i = 1; i < len dat) [i++] + /; if (dat{i} == closing && !escaping) + ;return true + ;; else if (dat{i} == '\\' && !escaping) + ;escaping = true + ;; else + ;escaping = false + ;/ + ;/ + + ;return false +;/ + /# Get the token_type value for a given string of character points @@ -301,7 +322,7 @@ /; if (len s` > 1) - /; if (is_numeric_literal(s)) + /; if (is_numeric_literal(s) || s`{0} == '"' || s`{0} == '\'') ;return TOKEN_TYPE.LITERAL ;/ diff --git a/tnslc/parse/tokenizer.tnsl b/tnslc/parse/tokenizer.tnsl index c6e1ab6..139877c 100644 --- a/tnslc/parse/tokenizer.tnsl +++ b/tnslc/parse/tokenizer.tnsl @@ -21,7 +21,8 @@ /; break_token ({}charp dat, charp c) [bool] /; if (len dat == 0) ;return false - + ;; else if (dat{0} == '"' || dat{0} == '\'') + ;return string_closed(dat, c) ;; else if (is_in_string(~RESERVED, dat{len dat - 1})) /; if (is_in_string(~RESERVED, c)) ;dat.append(c) @@ -99,8 +100,6 @@ ;out.append(ttk) ;/ - ;tnsl.io.println(tdat) - ;out = strip_and_expand(~out) ;return ~out ;/ -- cgit v1.2.3