# All single reserved characters
{}uint8 MULTI_PARENS = "/;:#"
{}uint8 PARENS = "()[]{}"
{}uint8 SEPS = "\n;:,"
{}uint8 RESERVED = "`~!%^&*()-+=[]{}|;:/?<>.,"
{}uint8 AUGMENTS = "=~!<>&|^+-*/%`."

{}uint8 WHITESPACE = " \r\n\t"

# All lists of keywords are comma delim because the compiler does not yet support arrays of strings
{}uint8 CSV_AUGMENTS = "++,--,==,!==,&&,||,^^,<==,>==,!>,!<,~=,`=,%=,^=,&=,*=,!=,|=,/=,<<,>>,!&,!|,!^,len,is"

{}uint8 CSV_KEYWORDS = "if,else,loop,continue,break,return,method,struct,enum,interface,export,module,const,static,volatile,raw,extends,override,asm"

{}uint8 CSV_KEYTYPES = "uint8,uint16,uint32,uint64,uint,int8,int16,int32,int64,int,float32,float64,float,comp32,comp64,comp,vect,bool,type,void"

# Types of tokens
enum TOKEN_TYPE [uint] {
    SEPARATOR = 0,
    DELIMITER = 1,
    AUGMENT = 2,
    KEYTYPE = 3,
    KEYWORD = 4,
    LITERAL = 5,
    DEFWORD = 6
}

# Token structure represents a single token in the program
struct Token {
    uint 
        _type,
        line,
        column,
    ~uint8
        data
}

# Shortcut methods on the token struct
/; method Token
    
    # Initialize the data buffer
    /; start
        self.data = _alloc(1)
        self.data{0} = 0
    ;/

    # Append a character to the end of the token
    /; append (uint8 ch)
        int ln = cstr_len(self.data)
        self.data = _realloc(self.data, ln + 2)
        self.data{ln} = ch
        self.data{ln + 1} = 0
    ;/

    # Remove the last character from this token
    /; pop
        int ln = cstr_len(self.data)
        self.data{ln - 1} = 0
    ;/

    # Copy another token to this token
    /; copy (Token other)
        self._type = other._type
        self.line = other.line
        self.column = other.column

        self.data = _alloc(cstr_len(other.data) + 1)

        cstr_copy(other.data, self.data)
    ;/

    # Delete the memory associated with this token
    /; _del
        _realloc(self.data, 0)
    ;/

    # length of the string that this token encodes
    /; _len [int]
        return cstr_len(self.data)
    ;/
;/

{}uint8 tkn_st = "{ \0", tkn_nd = " }\n\0", tkn_sp = " \0"

/; print_token (Token tok, ~void file_out)
    write_to_file(file_out, ~tkn_st{0})
    
    write_to_file(file_out, print_tok_type(tok))
    write_to_file(file_out, ~tkn_sp{0})
    write_to_file(file_out, tok.data)

    write_to_file(file_out, ~tkn_nd{0})
;/

# Returns true if the character is whitespace
/; is_whitespace(uint8 c) [bool]
    return contains_char(~WHITESPACE, c)
;/


# Returns true if the character is reserved
/; is_reserved (uint8 c) [bool]
    return contains_char(~RESERVED, c)
;/

# Returns true if the token is a valid reserved token
/; tok_reserved (Token tok) [bool]
    /; if (tok._len() == 1)
        return is_reserved(tok.data{0})
    ;; else if (tok._len() == 2)
        /; if (contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1}))
            return true
        ;/
   ;/

    return in_csv(~CSV_AUGMENTS, tok.data) !< 0
;/

# True if the token is a valid number (integer or float)
/; is_numeric_literal(Token tok) [bool]
    /; if (tok._len() < 1)
        return false
    ;; else if (tok.data{0} < '0' || tok.data{0} > '9')
        return false
    ;/

    bool non_dec = false
    /; if (tok._len() > 1 && tok.data{0} == '0')
        non_dec = tok.data{1} > '9'
        # TODO: non_dec not impl
    ;/

    bool dec_seen = false

    /; loop (int i = 0; i < tok._len()) [i++]
        /; if (dec_seen == false && tok.data{i} == '.')
            dec_seen = true
        ;; else if (tok.data{i} < '0' || tok.data{i} > '9')
            return false
        ;/
    ;/

    return true
;/

/; get_tok_type(Token tok) [uint]
    /; if (tok_reserved(tok) == true)
        /; if (tok._len() > 1)
            /; if (contains_char(~MULTI_PARENS, tok.data{0}) && contains_char(~MULTI_PARENS, tok.data{1}))
                return TOKEN_TYPE.DELIMITER
            ;/
            return TOKEN_TYPE.AUGMENT
        ;; else if (contains_char(~PARENS, tok.data{0}) == true)
            return TOKEN_TYPE.DELIMITER
        ;; else if (contains_char(~SEPS, tok.data{0}) == true)
            return TOKEN_TYPE.SEPARATOR
        ;; else if (contains_char(~AUGMENTS, tok.data{0}) == true)
            return TOKEN_TYPE.AUGMENT
        ;/
    ;; else if (in_csv(~CSV_KEYWORDS, tok.data) !< 0)
        return TOKEN_TYPE.KEYWORD
    ;; else if (in_csv(~CSV_KEYTYPES, tok.data) !< 0)
        return TOKEN_TYPE.KEYTYPE
    ;; else if (is_numeric_literal(tok) == true)
        return TOKEN_TYPE.LITERAL
    ;/

    return TOKEN_TYPE.DEFWORD
;/


/; break_token(~Token tok, uint8 c) [bool]
    uint type_before = get_tok_type(tok`)
    tok`.append(c)
    uint type_after = get_tok_type(tok`)
    tok`.pop()

    bool a = is_whitespace(c) && type_after !== TOKEN_TYPE.LITERAL
    
    bool b = false
    /; if (is_reserved(c) == true)
        b = type_after == TOKEN_TYPE.DEFWORD
    ;; else if (tok`._len() > 0)
        b = is_reserved(tok`.data{0})
    ;/
    
    bool c = type_before == TOKEN_TYPE.LITERAL && type_after == TOKEN_TYPE.DEFWORD

    return a || b || c
;/

/; handle_comment (~void file_in)
    uint8 buf = 0
    int read_count = 0
    /; loop
        _read_byte(file_in, ~buf, ~read_count)
        /; if (buf == '\n' || read_count == 0)
            break
        ;/
        read_count = 0
    ;/
;/

/; handle_str (~void file_in, Token tmp, ~int line, column, uint8 first) [Token]
    uint8 buf = first
    int read_count = 0
    tmp._type = TOKEN_TYPE.LITERAL
    tmp.append(buf)
    read_count = 0

    /; loop
        _read_byte(file_in, ~buf, ~read_count)

        /; if (read_count == 0)
            break
        ;/

        /; if (buf == '\\')
            tmp.append(buf)
            read_count = 0
            _read_byte(file_in, ~buf, ~read_count)
            column`++
            tmp.append(buf)
        ;; else if (buf == first)
            tmp.append(buf)
            break
        ;; else
            tmp.append(buf)
        ;/

        /; if (buf == '\n')
            line`++
            column` = 1
        ;; else
            column`++
        ;/

        read_count = 0
    ;/

    return tmp
;/

{}uint8 w_tkn_gen = "%d Tokens generated from file.\n\0"

/; tokenize_file (~void file_in) [Vector]
    # This vector is going to store all of our tokens as we generate them
    Vector out_vect
    # The size of a token struct is 3 uint + pointer = 4*8 = 32 bytes
    out_vect.start(32)

    Token tmp
    tmp.start()
    tmp.line = 1
    tmp.column = 1

    uint8 buf = 0
    int read_count = 0
    int line = 1
    int column = 1

    # Read loop.
    /; loop [column++]
        _read_byte(file_in, ~buf, ~read_count)
        /; if (read_count == 0)
            break
        ;/
        
        /; if (buf == '#')

            # Handle comment
            handle_comment(file_in)
            line++

        ;; else if (buf == '\'' || buf == '"')

            # Don't rope the last token into this
            /; if (tmp._len() > 0)
                tmp._type = get_tok_type(tmp)
                out_vect.push(~tmp)
                tmp.start()
            ;/

            # Handle char/string literal
            tmp = handle_str(file_in, tmp, ~line, ~column, buf)

            out_vect.push(~tmp)
            tmp.start()
            tmp.line = line
            tmp.column = column

        ;; else if (break_token(~tmp, buf) == true)

            # Handle token break
            /; if (tmp._len() > 0)
                tmp._type = get_tok_type(tmp)
                out_vect.push(~tmp)
                tmp.start()
            ;/

            tmp.line = line
            tmp.column = column
            /; if (is_whitespace(buf) == false)
                tmp.append(buf)
            ;/

        ;; else if (is_whitespace(buf) == false)

            # Add non-whitespace
            tmp.append(buf)

        ;/

        /; if (buf == '\n')
            line++
            column = 0
        ;/

        read_count = 0
    ;/

    /; if (tmp._len() > 0)
        tmp._type = get_tok_type(tmp)
        out_vect.push(~tmp)
    ;/

    _print_num(~w_tkn_gen{0}, out_vect._len())
    
    return out_vect
;/

{}uint8 w_SEP = "SEPARATOR\0"
{}uint8 w_DEL = "DELIMITER\0"
{}uint8 w_AUG = "AUGMENT\0"
{}uint8 w_KTP = "KEYTYPE\0"
{}uint8 w_KWD = "KEYWORD\0"
{}uint8 w_LIT = "LITERAL\0"
{}uint8 w_DEF = "DEFWORD\0"

/; print_tok_type(uint tt) [~uint8]

    ~uint8 ptr = ~w_DEF{0}

    /; if (tt == TOKEN_TYPE.SEPARATOR)
        ptr = ~w_SEP{0}
    ;; else if (tt == TOKEN_TYPE.DELIMITER)
        ptr = ~w_DEL{0}
    ;; else if (tt == TOKEN_TYPE.AUGMENT)
        ptr = ~w_AUG{0}
    ;; else if (tt == TOKEN_TYPE.KEYTYPE)
        ptr = ~w_KTP{0}
    ;; else if (tt == TOKEN_TYPE.KEYWORD)
        ptr = ~w_KWD{0}
    ;; else if (tt == TOKEN_TYPE.LITERAL)
        ptr = ~w_LIT{0}
    ;; else if (tt == TOKEN_TYPE.DEFWORD)
        ptr = ~w_DEF{0}
    ;/

    return ptr

;/