From 5a022193a96e72fa5144755938e6a575aba165b0 Mon Sep 17 00:00:00 2001 From: Kyle Gunger Date: Fri, 3 Jul 2020 13:17:26 -0400 Subject: Extra Numbers + Add line and character numbers to tokens + Impliment line and character numbers ~ Line and char nums start at 0 for now ~ There's this itch in my mind like something is broken --- src/main.go | 2 +- src/tparse/parse.go | 274 ------------------------------------------- src/tparse/token.go | 8 +- src/tparse/tokenize.go | 306 +++++++++++++++++++++++++++++++++++++++++++++++++ src/tparse/tree.go | 58 ++++++++++ src/tparse/type.go | 20 +--- 6 files changed, 371 insertions(+), 297 deletions(-) delete mode 100644 src/tparse/parse.go create mode 100644 src/tparse/tokenize.go create mode 100644 src/tparse/tree.go (limited to 'src') diff --git a/src/main.go b/src/main.go index ec3b14b..8edd129 100644 --- a/src/main.go +++ b/src/main.go @@ -18,7 +18,7 @@ func main() { return } - fd.WriteString(fmt.Sprint(tparse.ParseFile(*inputFile))) + fd.WriteString(fmt.Sprint(tparse.TokenizeFile(*inputFile))) fd.Close() } diff --git a/src/tparse/parse.go b/src/tparse/parse.go deleted file mode 100644 index 4f25fe3..0000000 --- a/src/tparse/parse.go +++ /dev/null @@ -1,274 +0,0 @@ -package tparse - -import ( - "bufio" - "io" - "os" - "strings" - "unicode" - "unicode/utf8" -) - -// Read in a number (may be a float) -func numericLiteral(r *bufio.Reader) Token { - decimal := false - run, _, err := r.ReadRune() - - b := strings.Builder{} - - for ; err == nil; run, _, err = r.ReadRune() { - if (run == '.' || run == ',') && !decimal { - decimal = true - } else if !unicode.IsNumber(run) { - break - } - b.WriteRune(run) - } - - r.UnreadRune() - - return Token{Type: LITERAL, Data: b.String()} -} - -// Parse a string (will escape \" only in this stage) -func stringLiteral(r *bufio.Reader) Token { - escape := false - run, _, err := r.ReadRune() - - if run != '"' { - return Token{Type: LITERAL} - } - - b := strings.Builder{} - b.WriteRune(run) - run, _, err = r.ReadRune() - - for ; err == nil; run, _, err = r.ReadRune() { - b.WriteRune(run) - if run == '\\' && !escape { - escape = true - } else if run == '"' && !escape { - break - } - - } - - return Token{Type: LITERAL, Data: b.String()} -} - -// Parse a character in (escape \\ or \') -func charLiteral(r *bufio.Reader) Token { - escape := false - run, _, err := r.ReadRune() - - if run != '\'' { - return Token{Type: LITERAL} - } - - b := strings.Builder{} - b.WriteRune(run) - run, _, err = r.ReadRune() - - for ; err == nil; run, _, err = r.ReadRune() { - b.WriteRune(run) - if run == '\\' && !escape { - escape = true - } else if run == '\'' && !escape { - break - } - - } - - return Token{Type: LITERAL, Data: b.String()} -} - -// Split reserved runes into rune groups -func splitResRunes(str string, max int) []Token { - out := []Token{} - - rs := StringAsRunes(str) - s, e := 0, max - - if max > len(rs) { - e = len(rs) - } - - for e <= len(rs) && s < len(rs) { - if checkRuneGroup(RunesAsString(rs[s:e])) != -1 || e == s+1 { - tmp := RunesAsString(rs[s:e]) - out = append(out, Token{Type: checkRuneGroup(tmp), Data: tmp}) - s = e - if s+max < len(rs) { - e = s + max - } else { - e = len(rs) - } - } else if e != s+1 { - e-- - } - } - - return out -} - -// Remove block comments -func stripBlockComments(t []Token) []Token { - out := []Token{} - bc := false - for _, tok := range t { - if tok.Type == DELIMIT && tok.Data == "/#" { - bc = true - continue - } - - if tok.Type == DELIMIT && tok.Data == "#/" { - bc = false - continue - } - - if bc { - continue - } - - out = append(out, tok) - } - - return out -} - -// ParseFile tries to read a file and turn it into a series of tokens -func ParseFile(path string) []Token { - out := []Token{} - - fd, err := os.Open(path) - - if err != nil { - return out - } - - read := bufio.NewReader(fd) - - b := strings.Builder{} - - max := maxResRunes() - - for r := rune(' '); ; r, _, err = read.ReadRune() { - // If error in stream or EOF, break - if err != nil { - if err != io.EOF { - out = append(out, Token{Type: -1}) - } - break - } - - // Checking for a space - if unicode.IsSpace(r) { - if b.String() != "" { - out = append(out, Token{Type: checkToken(b.String()), Data: b.String()}) - b.Reset() - } - continue - } - - if unicode.IsNumber(r) && b.String() == "" { - read.UnreadRune() - out = append(out, numericLiteral(read)) - - continue - } - - if r == '\'' { - if b.String() != "" { - out = append(out, Token{Type: checkToken(b.String()), Data: b.String()}) - b.Reset() - } - - read.UnreadRune() - out = append(out, charLiteral(read)) - - continue - } - - if r == '"' { - if b.String() != "" { - out = append(out, Token{Type: checkToken(b.String()), Data: b.String()}) - b.Reset() - } - - read.UnreadRune() - out = append(out, stringLiteral(read)) - - continue - } - - // Checking for a rune group - if checkResRune(r) != -1 { - if b.String() != "" { - out = append(out, Token{Type: checkToken(b.String()), Data: b.String()}) - b.Reset() - } - - for ; err == nil; r, _, err = read.ReadRune() { - if checkResRune(r) == -1 { - break - } - b.WriteRune(r) - } - - read.UnreadRune() - - rgs := splitResRunes(b.String(), max) - - // Line Comments - for i, rg := range rgs { - if rg.Data == "#" { - rgs = rgs[:i] - read.ReadString('\n') - break - } - } - - out = append(out, rgs...) - - b.Reset() - - continue - } - - // Accumulate - b.WriteRune(r) - } - - return stripBlockComments(out) -} - -// StringAsRunes returns a string as a rune slice -func StringAsRunes(s string) []rune { - out := []rune{} - for i, j := 0, 0; i < len(s); i += j { - r, w := utf8.DecodeRuneInString(s[i:]) - out = append(out, r) - j = w - } - return out -} - -// BytesAsRunes returns a byte slice as a rune slice -func BytesAsRunes(b []byte) []rune { - out := []rune{} - for i, j := 0, 0; i < len(b); i += j { - r, w := utf8.DecodeRune(b[i:]) - out = append(out, r) - j = w - } - return out -} - -// RunesAsString returns a string from a slice of runes -func RunesAsString(rs []rune) string { - b := strings.Builder{} - for _, r := range rs { - b.WriteRune(r) - } - return b.String() -} diff --git a/src/tparse/token.go b/src/tparse/token.go index 712b746..386d52b 100644 --- a/src/tparse/token.go +++ b/src/tparse/token.go @@ -4,10 +4,6 @@ package tparse type Token struct { Type int Data string -} - -// Container represents a container of data -type Container struct { - Data []interface{} - Holds bool + Line int + Char int } diff --git a/src/tparse/tokenize.go b/src/tparse/tokenize.go new file mode 100644 index 0000000..79a0605 --- /dev/null +++ b/src/tparse/tokenize.go @@ -0,0 +1,306 @@ +package tparse + +import ( + "bufio" + "io" + "os" + "strings" + "unicode" + "unicode/utf8" +) + +// Read in a number (may be a float) +func numericLiteral(r *bufio.Reader, line int, char *int) Token { + decimal := false + run, _, err := r.ReadRune() + last := *char + b := strings.Builder{} + + for ; err == nil; run, _, err = r.ReadRune() { + if (run == '.' || run == ',') && !decimal { + decimal = true + } else if !unicode.IsNumber(run) { + break + } + *char++ + b.WriteRune(run) + } + + r.UnreadRune() + + return Token{Type: LITERAL, Data: b.String(), Line: line, Char: last} +} + +// Parse a string (will escape \" only in this stage) +func stringLiteral(r *bufio.Reader, line, char *int) Token { + escape := false + run, _, err := r.ReadRune() + last := *char + + if run != '"' { + return Token{Type: LITERAL} + } + + b := strings.Builder{} + b.WriteRune(run) + run, _, err = r.ReadRune() + + for ; err == nil; run, _, err = r.ReadRune() { + *char++ + b.WriteRune(run) + if run == '\\' && !escape { + escape = true + } else if (run == '"' || run == '\n') && !escape { + break + } else if escape { + if run == '\n' { + *line++ + } + escape = false + } + } + + return Token{Type: LITERAL, Data: b.String(), Line: *line, Char: last} +} + +// Parse a character in (escape \\ or \') +func charLiteral(r *bufio.Reader, line int, char *int) Token { + escape := false + run, _, err := r.ReadRune() + last := *char + + if run != '\'' { + return Token{Type: LITERAL} + } + + b := strings.Builder{} + b.WriteRune(run) + run, _, err = r.ReadRune() + + for ; err == nil; run, _, err = r.ReadRune() { + b.WriteRune(run) + *char++ + if run == '\\' && !escape { + escape = true + } else if (run == '\'' && !escape) || run == '\n' { + break + } else if escape { + escape = false + } + } + + return Token{Type: LITERAL, Data: b.String(), Line: line, Char: last} +} + +// Split reserved runes into rune groups +func splitResRunes(str string, max, line, start int) []Token { + out := []Token{} + + rs := StringAsRunes(str) + s, e := 0, max + + if max > len(rs) { + e = len(rs) + } + + for e <= len(rs) && s < len(rs) { + if checkRuneGroup(RunesAsString(rs[s:e])) != -1 || e == s+1 { + tmp := RunesAsString(rs[s:e]) + out = append(out, Token{Type: checkRuneGroup(tmp), Data: tmp, Line: line, Char: start + s}) + s = e + if s+max < len(rs) { + e = s + max + } else { + e = len(rs) + } + } else if e != s+1 { + e-- + } + } + + return out +} + +// Remove block comments +func stripBlockComments(t []Token) []Token { + out := []Token{} + bc := false + for _, tok := range t { + if tok.Type == DELIMIT && tok.Data == "/#" { + bc = true + continue + } else if tok.Type == DELIMIT && tok.Data == "#/" { + bc = false + continue + } else if bc { + continue + } + + out = append(out, tok) + } + + return out +} + +// TokenizeFile tries to read a file and turn it into a series of tokens +func TokenizeFile(path string) []Token { + out := []Token{} + + fd, err := os.Open(path) + + if err != nil { + return out + } + + read := bufio.NewReader(fd) + + b := strings.Builder{} + + max := maxResRunes() + + ln, cn, last := int(0), int(-1), int(0) + sp := false + + for r := rune(' '); ; r, _, err = read.ReadRune() { + cn++ + // If error in stream or EOF, break + if err != nil { + if err != io.EOF { + out = append(out, Token{Type: -1}) + } + break + } + + // Checking for a space + if unicode.IsSpace(r) { + sp = true + if b.String() != "" { + out = append(out, Token{Type: checkToken(b.String()), Data: b.String(), Line: ln, Char: last}) + b.Reset() + } + + // checking for a newline + if r == '\n' { + ln++ + cn = -1 + last = 0 + } + + continue + } else if sp { + last = cn + sp = false + } + + if unicode.IsNumber(r) && b.String() == "" { + read.UnreadRune() + out = append(out, numericLiteral(read, ln, &cn)) + sp = true + + continue + } + + if r == '\'' { + if b.String() != "" { + out = append(out, Token{Type: checkToken(b.String()), Data: b.String(), Line: ln, Char: last}) + b.Reset() + } + + read.UnreadRune() + out = append(out, charLiteral(read, ln, &cn)) + sp = true + + continue + } + + if r == '"' { + if b.String() != "" { + out = append(out, Token{Type: checkToken(b.String()), Data: b.String()}) + b.Reset() + } + + read.UnreadRune() + out = append(out, stringLiteral(read, &ln, &cn)) + sp = true + + continue + } + + // Checking for a rune group + if checkResRune(r) != -1 { + if b.String() != "" { + out = append(out, Token{Type: checkToken(b.String()), Data: b.String(), Line: ln, Char: last}) + b.Reset() + } + last = cn + for ; err == nil; r, _, err = read.ReadRune() { + if checkResRune(r) == -1 { + break + } + cn++ + b.WriteRune(r) + } + cn-- + + read.UnreadRune() + + rgs := splitResRunes(b.String(), max, ln, last) + + // Line Comments + for i, rg := range rgs { + if rg.Data == "#" { + rgs = rgs[:i] + read.ReadString('\n') + ln++ + cn = -1 + last = 0 + break + } + } + + out = append(out, rgs...) + + b.Reset() + + sp = true + + continue + } + + // Accumulate + b.WriteRune(r) + } + + return stripBlockComments(out) +} + +// StringAsRunes returns a string as a rune slice +func StringAsRunes(s string) []rune { + out := []rune{} + for i, j := 0, 0; i < len(s); i += j { + r, w := utf8.DecodeRuneInString(s[i:]) + out = append(out, r) + j = w + } + return out +} + +// BytesAsRunes returns a byte slice as a rune slice +func BytesAsRunes(b []byte) []rune { + out := []rune{} + for i, j := 0, 0; i < len(b); i += j { + r, w := utf8.DecodeRune(b[i:]) + out = append(out, r) + j = w + } + return out +} + +// RunesAsString returns a string from a slice of runes +func RunesAsString(rs []rune) string { + b := strings.Builder{} + for _, r := range rs { + b.WriteRune(r) + } + return b.String() +} diff --git a/src/tparse/tree.go b/src/tparse/tree.go new file mode 100644 index 0000000..41896fc --- /dev/null +++ b/src/tparse/tree.go @@ -0,0 +1,58 @@ +package tparse + +// Node represents a group of nodes or a directive +type Node struct { + SubNodes []Node + + Dir Directive +} + +// Directive represents a block or single directive +type Directive struct { + Type string + ID string + + Data []string +} + +func handleCode(tokens *[]Token, start int) (Node, int) { + out := Node{} + + return out, start +} + +func handlePre(tokens *[]Token, start int) (Node, int) { + out := Node{} + + return out, start +} + +// CreateTree takes a series of tokens and converts them into an AST +func CreateTree(tokens *[]Token, start int) Node { + out := Node{} + out.Dir = Directive{Type: "root", ID: "root"} + + var tmp Node + + for i, t := range *tokens { + switch t.Type { + case LINESEP: + if t.Data == ";" { + tmp, i = handleCode(tokens, i) + } else if t.Data == ":" { + tmp, i = handlePre(tokens, i) + } + break + case DELIMIT: + if t.Data == "/;" { + tmp, i = handleCode(tokens, i) + } else if t.Data == "/:" { + tmp, i = handlePre(tokens, i) + } + break + } + out.SubNodes = append(out.SubNodes, tmp) + } + + return out +} diff --git a/src/tparse/type.go b/src/tparse/type.go index 867f14c..dc0df10 100644 --- a/src/tparse/type.go +++ b/src/tparse/type.go @@ -33,10 +33,12 @@ const DEFWORD = 8 var RESWORD = map[string]int{ "import": PREWORD, + "bool": KEYTYPE, + "byte": KEYTYPE, + "char": KEYTYPE, + "int": KEYTYPE, - "bool": KEYTYPE, "float": KEYTYPE, - "char": KEYTYPE, "struct": KEYWORD, "type": KEYWORD, @@ -146,9 +148,6 @@ func checkResRune(r rune) int { // RESRUNES Reserved sets of reserved runes which mean something var RESRUNES = map[string]int{ - // Pre-processor block - "/:": DELIMIT, - ":/": DELIMIT, // Code block "/;": DELIMIT, ";/": DELIMIT, @@ -156,18 +155,7 @@ var RESRUNES = map[string]int{ "/#": DELIMIT, "#/": DELIMIT, - // Quick chain - "::": DELIMIT, - ":;": DELIMIT, - ":#": DELIMIT, - ";;": DELIMIT, - ";:": DELIMIT, - ";#": DELIMIT, - - "##": DELIMIT, - "#:": DELIMIT, - "#;": DELIMIT, // Boolean equ "==": AUGMENT, -- cgit v1.2.3