From 0428c27adf1a145f295ec477d498596ff1a75736 Mon Sep 17 00:00:00 2001 From: Kyle Gunger Date: Fri, 6 Nov 2020 02:21:08 -0500 Subject: Start work on AST generation --- src/main.go | 23 ++++++- src/tparse/preproc.go | 1 - src/tparse/resolver.go | 23 +++++++ src/tparse/token.go | 23 +++++++ src/tparse/tokenize.go | 77 +++++++++++++++++----- src/tparse/tree.go | 176 ++++++++++++++++++++++++++++++++++--------------- src/tparse/type.go | 110 ++++++++++++++++++++++++------- 7 files changed, 337 insertions(+), 96 deletions(-) delete mode 100644 src/tparse/preproc.go create mode 100644 src/tparse/resolver.go (limited to 'src') diff --git a/src/main.go b/src/main.go index 8edd129..c74d0cb 100644 --- a/src/main.go +++ b/src/main.go @@ -1,3 +1,19 @@ +/* + Copyright 2020 Kyle Gunger + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + package main import "fmt" @@ -7,7 +23,7 @@ import "os" func main() { inputFile := flag.String("in", "", "The file to parse") - outputFile := flag.String("out", "out.tnp", "The file to store the parse in") + outputFile := flag.String("out", "out.tnt", "The file to store the node tree") flag.Parse() @@ -18,7 +34,10 @@ func main() { return } - fd.WriteString(fmt.Sprint(tparse.TokenizeFile(*inputFile))) + tokens := tparse.TokenizeFile(*inputFile) + tree := tparse.MakeTree(&tokens, *inputFile) + + fd.WriteString(fmt.Sprint(tree)) fd.Close() } diff --git a/src/tparse/preproc.go b/src/tparse/preproc.go deleted file mode 100644 index f591b44..0000000 --- a/src/tparse/preproc.go +++ /dev/null @@ -1 +0,0 @@ -package tparse diff --git a/src/tparse/resolver.go b/src/tparse/resolver.go new file mode 100644 index 0000000..8408a1c --- /dev/null +++ b/src/tparse/resolver.go @@ -0,0 +1,23 @@ +/* + Copyright 2020 Kyle Gunger + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package tparse + +//TODO: Everything + +/* + This file is suppost to contain code to include other files when asked, and represents the most important part of the pre-processor +*/ diff --git a/src/tparse/token.go b/src/tparse/token.go index 386d52b..f923af4 100644 --- a/src/tparse/token.go +++ b/src/tparse/token.go @@ -1,3 +1,19 @@ +/* + Copyright 2020 Kyle Gunger + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + package tparse // Token represents a token in a program @@ -7,3 +23,10 @@ type Token struct { Line int Char int } + +// Node represents a node in an AST +type Node struct { + Parent *Node + Data Token + Sub []Node +} diff --git a/src/tparse/tokenize.go b/src/tparse/tokenize.go index f5e3f54..31ae225 100644 --- a/src/tparse/tokenize.go +++ b/src/tparse/tokenize.go @@ -1,3 +1,19 @@ +/* + Copyright 2020 Kyle Gunger + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + package tparse import ( @@ -128,26 +144,23 @@ func stripBlockComments(t []Token) []Token { for _, tok := range t { if tok.Type == DELIMIT { + ch := ":" switch tok.Data { case ";#": - out = append(out, Token{DELIMIT, ";/", tok.Line, tok.Char}) - bc = true - continue + ch = ";" + fallthrough case ":#": - out = append(out, Token{DELIMIT, ":/", tok.Line, tok.Char}) - bc = true - continue + out = append(out, Token{DELIMIT, ch + "/", tok.Line, tok.Char}) + fallthrough case "/#": bc = true continue case "#;": - out = append(out, Token{DELIMIT, "/;", tok.Line, tok.Char}) - bc = false - continue + ch = ";" + fallthrough case "#:": - out = append(out, Token{DELIMIT, "/:", tok.Line, tok.Char}) - bc = false - continue + out = append(out, Token{DELIMIT, "/" + ch, tok.Line, tok.Char}) + fallthrough case "#/": bc = false continue @@ -162,6 +175,32 @@ func stripBlockComments(t []Token) []Token { return out } +func endsDef(toks *[]Token) bool { + for i := range *toks { + switch (*toks)[i].Data { + case ":", ";", "/;", "/:", "#;", "#:", ";;", "::": + return true + } + } + + return false +} + +func endsPre(toks *[]Token) bool { + o := false + + for i := range *toks { + switch (*toks)[i].Data { + case ":", "/:", "#:", "::": + o = true + case ";", "/;", "#;", ";;": + o = false + } + } + + return o +} + // TokenizeFile tries to read a file and turn it into a series of tokens func TokenizeFile(path string) []Token { out := []Token{} @@ -179,7 +218,7 @@ func TokenizeFile(path string) []Token { max := maxResRunes() ln, cn, last := int(1), int(-1), int(0) - sp := false + sp, pre := false, false for r := rune(' '); ; r, _, err = read.ReadRune() { cn++ @@ -195,7 +234,7 @@ func TokenizeFile(path string) []Token { if unicode.IsSpace(r) { sp = true if b.String() != "" { - out = append(out, Token{Type: checkToken(b.String()), Data: b.String(), Line: ln, Char: last}) + out = append(out, Token{Type: checkToken(b.String(), pre), Data: b.String(), Line: ln, Char: last}) b.Reset() } @@ -222,7 +261,7 @@ func TokenizeFile(path string) []Token { if r == '\'' { if b.String() != "" { - out = append(out, Token{Type: checkToken(b.String()), Data: b.String(), Line: ln, Char: last}) + out = append(out, Token{Type: checkToken(b.String(), pre), Data: b.String(), Line: ln, Char: last}) b.Reset() } @@ -235,7 +274,7 @@ func TokenizeFile(path string) []Token { if r == '"' { if b.String() != "" { - out = append(out, Token{Type: checkToken(b.String()), Data: b.String()}) + out = append(out, Token{Type: checkToken(b.String(), pre), Data: b.String()}) b.Reset() } @@ -249,7 +288,7 @@ func TokenizeFile(path string) []Token { // Checking for a rune group if checkResRune(r) != -1 { if b.String() != "" { - out = append(out, Token{Type: checkToken(b.String()), Data: b.String(), Line: ln, Char: last}) + out = append(out, Token{Type: checkToken(b.String(), pre), Data: b.String(), Line: ln, Char: last}) b.Reset() } last = cn @@ -284,6 +323,10 @@ func TokenizeFile(path string) []Token { sp = true + if endsDef(&rgs) { + pre = endsPre(&rgs) + } + continue } diff --git a/src/tparse/tree.go b/src/tparse/tree.go index 417580a..748ca90 100644 --- a/src/tparse/tree.go +++ b/src/tparse/tree.go @@ -1,95 +1,163 @@ +/* + Copyright 2020 Kyle Gunger + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + package tparse -// Node represents a group of nodes or a directive -type Node struct { - SubNodes []Node +import "fmt" + +// ID 9 = ast thing - Dir Directive +func errOut(message string, token Token) { + fmt.Println(message) + fmt.Println(token) + panic(token) } -// Directive represents a block or single directive -type Directive struct { - Type string - Data string +func tree(tokens *[]Token, tok, max int) (Node, int) { + out := Node{} - Param Paramaters -} + for ; tok < max; tok++ { + //t := (*tokens)[tok] + } -// Paramaters represents a set of paramaters for a directive -type Paramaters struct { - In []string - Out []string + return out, tok } -func handleCode(tokens *[]Token, start int) (Node, int) { +func parseList(tokens *[]Token, tok, max int) (Node, int) { out := Node{} + out.Data = Token{Type: 10, Data: "list"} + + for ; tok < max; tok++ { + //t := (*tokens)[tok] + } - return out, start + return out, tok } -func handleBlock(tokens *[]Token, start int) (Node, int) { - var out Node +func parseTypeList(tokens *[]Token, tok, max int) (Node, int) { + out := Node{} + out.Data = Token{Type: 10, Data: "list"} var tmp Node - l := len(*tokens) + for ; tok < max; tok++ { + t := (*tokens)[tok] - if start >= l { - panic((*tokens)[l-1]) + switch t.Data { + case ")", "]", "}": + return out, tok + case ",": + tok++ + default: + errOut("Error: unexpected token when parsing a list of types", t) + } + + tmp, tok = parseType(tokens, tok, max) + out.Sub = append(out.Sub, tmp) } - for ; start < l; start++ { - t := (*tokens)[start] + return out, tok +} + +func parseVoidType(tokens *[]Token, tok, max int) (Node, int) { + out := Node{} + + for ; tok < max; tok++ { + //t := (*tokens)[tok] + } + + return out, tok +} + +func parseType(tokens *[]Token, tok, max int) (Node, int) { + out := Node{} + working := &out + + for ; tok < max; tok++ { + t := (*tokens)[tok] switch t.Type { - case LINESEP: - if t.Data == ";" { - tmp, start = handleCode(tokens, start+1) + case AUGMENT: + if t.Data != "~" && t.Data != "`" { + errOut("Error: unexpected augment token when parsing type", t) } - break - case DELIMIT: - if t.Data == "/;" { - tmp, start = handleCode(tokens, start+1) + working.Data = t + + case KEYTYPE: + if t.Data == "void" { + *working, tok = parseVoidType(tokens, tok, max) + } else { + working.Data = t } - break + + return out, tok + + case DEFWORD: + if (*tokens)[tok+1].Data == "(" { + + } + + case KEYWORD: + if t.Data != "const" && t.Data != "volatile" { + errOut("Error: unexpected keyword when parsing type", t) + } + working.Data = t + default: - panic(t) + errOut("Error: unexpected token when parsing type", t) } - out.SubNodes = append(out.SubNodes, tmp) + working.Sub = append(working.Sub, Node{}) + working = &(working.Sub[0]) } - return out, start + return out, tok } -func handlePre(tokens *[]Token, start int) (Node, int) { +func parseValue(tokens *[]Token, tok, max int) (Node, int) { out := Node{} - return out, start + for ; tok < max; tok++ { + t := (*tokens)[tok] + switch t.Type { + case LITERAL: + case DEFWORD: + case DELIMIT: + } + } + + return out, tok } -// CreateTree takes a series of tokens and converts them into an AST -func CreateTree(tokens *[]Token, start int) Node { +// MakeTree creates an AST out of a set of tokens +func MakeTree(tokens *[]Token, file string) Node { out := Node{} - out.Dir = Directive{Type: "root"} + out.Data = Token{9, file, 0, 0} + out.Parent = &out - var tmp Node + tmp := Node{} + working := &tmp - for i, t := range *tokens { + for _, t := range *tokens { switch t.Type { case LINESEP: - if t.Data == ";" { - tmp, i = handleCode(tokens, i) - } else if t.Data == ":" { - tmp, i = handlePre(tokens, i) - } - break + case DELIMIT: - if t.Data == "/;" { - tmp, i = handleCode(tokens, i) - } else if t.Data == "/:" { - tmp, i = handlePre(tokens, i) - } - break + } - out.SubNodes = append(out.SubNodes, tmp) + tmp = Node{Data: t} + + working.Sub = append(working.Sub, tmp) } return out diff --git a/src/tparse/type.go b/src/tparse/type.go index 3e6ef50..cd4f536 100644 --- a/src/tparse/type.go +++ b/src/tparse/type.go @@ -1,12 +1,26 @@ -package tparse +/* + Copyright 2020 Kyle Gunger + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 -import () + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package tparse // LINESEP represents a line seperator const LINESEP = 0 -// ARGNSEP represents an inline seperator -const ARGNSEP = 1 +// INLNSEP represents an inline seperator +const INLNSEP = 1 // DELIMIT represents an opening or closing delimiter const DELIMIT = 2 @@ -29,24 +43,60 @@ const KEYWORD = 7 // DEFWORD represents a user-defined word such as a variable, method, or struct const DEFWORD = 8 -// RESWORD represents all the reserved words and what type of tokens they are. -var RESWORD = map[string]int{ - "import": PREWORD, +// PREWORDS represents all the pre-processor directives +var PREWORDS = []string{ + "include", + "define", + "extern", + "size", + "align", + "if", + "else", + "mark", +} - "bool": KEYTYPE, - "char": KEYTYPE, +func checkPreWord(s string) int { + for _, str := range PREWORDS { + if str == s { + return PREWORD + } + } - "int": KEYTYPE, - "float": KEYTYPE, + return -1 +} - "struct": KEYWORD, - "type": KEYWORD, +// RESWORD represents all the reserved words and what type of tokens they are. +var RESWORD = map[string]int{ + "bool": KEYTYPE, + "achar": KEYTYPE, + "uchar": KEYTYPE, + + "int8": KEYTYPE, + "int16": KEYTYPE, + "int32": KEYTYPE, + "int64": KEYTYPE, + "uint8": KEYTYPE, + "uint16": KEYTYPE, + "uint32": KEYTYPE, + "uint64": KEYTYPE, + + "float32": KEYTYPE, + "float64": KEYTYPE, + + "void": KEYTYPE, + "type": KEYTYPE, + + "struct": KEYWORD, + "interface": KEYWORD, + "enum": KEYWORD, + "is": KEYWORD, + "extends": KEYWORD, "loop": KEYWORD, "continue": KEYWORD, "break": KEYWORD, - "switch": KEYWORD, + "match": KEYWORD, "case": KEYWORD, "default": KEYWORD, @@ -60,10 +110,19 @@ var RESWORD = map[string]int{ "static": KEYWORD, "volatile": KEYWORD, + "method": KEYWORD, + "override": KEYWORD, + "self": KEYWORD, + "super": KEYWORD, + "operator": KEYWORD, + + "raw": KEYWORD, + "asm": KEYWORD, + "true": LITERAL, "false": LITERAL, - "null": LITERAL, + "delete": KEYWORD, } func checkResWord(s string) int { @@ -84,9 +143,9 @@ var RESRUNE = map[rune]int{ '[': DELIMIT, // Ending condition close ']': DELIMIT, - // Array mark open + // Array/set mark open '{': DELIMIT, - // Array mark close + // Array/set mark close '}': DELIMIT, // Start of pre-proc directive @@ -97,7 +156,7 @@ var RESRUNE = map[rune]int{ '#': LINESEP, // Seperate arguments or enclosed statements - ',': ARGNSEP, + ',': INLNSEP, // Assignment '=': AUGMENT, @@ -134,7 +193,7 @@ var RESRUNE = map[rune]int{ // Address of '~': AUGMENT, // De-ref - '_': AUGMENT, + '`': AUGMENT, } func checkResRune(r rune) int { @@ -181,14 +240,13 @@ var RESRUNES = map[string]int{ "&=": AUGMENT, "|=": AUGMENT, "^=": AUGMENT, - "!=": AUGMENT, "+=": AUGMENT, "-=": AUGMENT, "*=": AUGMENT, "/=": AUGMENT, "%=": AUGMENT, "~=": AUGMENT, - "_=": AUGMENT, + "`=": AUGMENT, // POSTaugmented augmentors "!&": AUGMENT, @@ -201,6 +259,10 @@ var RESRUNES = map[string]int{ "!<": AUGMENT, ">==": AUGMENT, "<==": AUGMENT, + + // Increment and De-increment + "++": AUGMENT, + "--": AUGMENT, } func maxResRunes() int { @@ -229,7 +291,7 @@ func checkRuneGroup(s string) int { return out } -func checkToken(s string) int { +func checkToken(s string, pre bool) int { rs := StringAsRunes(s) if len(rs) == 0 { @@ -245,6 +307,10 @@ func checkToken(s string) int { o := checkResWord(s) + if pre { + o = checkPreWord(s) + } + if o > -1 { return o } -- cgit v1.2.3