summaryrefslogtreecommitdiff
path: root/src/tparse
diff options
context:
space:
mode:
authorKyle Gunger <corechg@gmail.com>2020-06-28 14:30:45 -0400
committerKyle Gunger <corechg@gmail.com>2020-06-28 14:30:45 -0400
commit8f9cf0d4856bb53009bb58b53a42e21e2cd1e947 (patch)
treeb022091a0c3105e2da54b9dc16e5f55852b788f3 /src/tparse
[Initial parser] Upload existing
Diffstat (limited to 'src/tparse')
-rw-r--r--src/tparse/parse.go203
-rw-r--r--src/tparse/token.go13
-rw-r--r--src/tparse/type.go267
3 files changed, 483 insertions, 0 deletions
diff --git a/src/tparse/parse.go b/src/tparse/parse.go
new file mode 100644
index 0000000..e9e1ee6
--- /dev/null
+++ b/src/tparse/parse.go
@@ -0,0 +1,203 @@
+package tparse
+
+import (
+ "bufio"
+ "io"
+ "os"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+// Read in a number (may be a float)
+func numericLiteral(r *bufio.Reader) Token {
+ decimal := false
+ run, _, err := r.ReadRune()
+
+ b := strings.Builder{}
+
+ for ; err == nil; run, _, err = r.ReadRune() {
+ if (run == '.' || run == ',') && !decimal {
+ decimal = true
+ } else if !unicode.IsNumber(run) {
+ break
+ }
+ b.WriteRune(run)
+ }
+
+ r.UnreadRune()
+
+ return Token{Type: LITERAL, Data: b.String()}
+}
+
+// Parse a string (will escape \" only in this stage)
+func stringLiteral(r *bufio.Reader) Token {
+ escape := false
+ run, _, err := r.ReadRune()
+
+ if run != '"' {
+ return Token{Type: LITERAL}
+ }
+
+ b := strings.Builder{}
+
+ for ; err == nil; run, _, err = r.ReadRune() {
+ b.WriteRune(run)
+ if run == '\\' && !escape {
+ escape = true
+ } else if run == '"' && !escape {
+ break
+ }
+
+ }
+
+ return Token{Type: LITERAL, Data: b.String()}
+}
+
+// Parse a character in (escape \\ or \')
+func charLiteral(r *bufio.Reader) Token {
+ escape := false
+ run, _, err := r.ReadRune()
+
+ if run != '\'' {
+ return Token{Type: LITERAL}
+ }
+
+ b := strings.Builder{}
+
+ for ; err == nil; run, _, err = r.ReadRune() {
+ b.WriteRune(run)
+ if run == '\\' && !escape {
+ escape = true
+ } else if run == '\'' && !escape {
+ break
+ }
+
+ }
+
+ return Token{Type: LITERAL, Data: b.String()}
+}
+
+// Split reserved runes into rune groups
+func splitResRunes(str string, max int) []Token {
+ out := []Token{}
+
+ rs := StringAsRunes(str)
+ s, e := 0, max
+
+ if max > len(rs) {
+ e = len(rs)
+ }
+
+ for e <= len(rs) && s < len(rs) {
+ if checkRuneGroup(RunesAsString(rs[s:e])) != -1 || e == s+1 {
+ tmp := RunesAsString(rs[s:e])
+ out = append(out, Token{Type: checkRuneGroup(tmp), Data: tmp})
+ s = e
+ if s+max < len(rs) {
+ e = s + max
+ } else {
+ e = len(rs)
+ }
+ } else if e != s+1 {
+ e--
+ }
+ }
+
+ return out
+}
+
+// ParseFile tries to read a file and turn it into a series of tokens
+func ParseFile(path string) []Token {
+ out := []Token{}
+
+ fd, err := os.Open(path)
+
+ if err != nil {
+ return out
+ }
+
+ read := bufio.NewReader(fd)
+
+ b := strings.Builder{}
+
+ max := maxResRunes()
+
+ for r := ' '; ; r, _, err = read.ReadRune() {
+ // If error in stream or EOF, break
+ if err != nil {
+ if err != io.EOF {
+ out = append(out, Token{Type: -1})
+ }
+ break
+ }
+
+ // Checking for a space
+ if unicode.IsSpace(r) {
+ if b.String() != "" {
+ out = append(out, Token{Type: checkToken(b.String()), Data: b.String()})
+ b.Reset()
+ }
+ continue
+ }
+
+ // Checking for a rune group
+ if checkResRune(r) != -1 {
+ if b.String() != "" {
+ out = append(out, Token{Type: checkToken(b.String()), Data: b.String()})
+ b.Reset()
+ }
+
+ for ; err == nil; r, _, err = read.ReadRune() {
+ if checkResRune(r) == -1 {
+ break
+ }
+ b.WriteRune(r)
+ }
+
+ read.UnreadRune()
+
+ out = append(out, splitResRunes(b.String(), max)...)
+
+ b.Reset()
+
+ continue
+ }
+
+ // Accumulate
+ b.WriteRune(r)
+ }
+
+ return out
+}
+
+// StringAsRunes returns a string as a rune slice
+func StringAsRunes(s string) []rune {
+ out := []rune{}
+ for i, j := 0, 0; i < len(s); i += j {
+ r, w := utf8.DecodeRuneInString(s[i:])
+ out = append(out, r)
+ j = w
+ }
+ return out
+}
+
+// BytesAsRunes returns a byte slice as a rune slice
+func BytesAsRunes(b []byte) []rune {
+ out := []rune{}
+ for i, j := 0, 0; i < len(b); i += j {
+ r, w := utf8.DecodeRune(b[i:])
+ out = append(out, r)
+ j = w
+ }
+ return out
+}
+
+// RunesAsString returns a string from a slice of runes
+func RunesAsString(rs []rune) string {
+ b := strings.Builder{}
+ for _, r := range rs {
+ b.WriteRune(r)
+ }
+ return b.String()
+}
diff --git a/src/tparse/token.go b/src/tparse/token.go
new file mode 100644
index 0000000..712b746
--- /dev/null
+++ b/src/tparse/token.go
@@ -0,0 +1,13 @@
+package tparse
+
+// Token represents a token in a program
+type Token struct {
+ Type int
+ Data string
+}
+
+// Container represents a container of data
+type Container struct {
+ Data []interface{}
+ Holds bool
+}
diff --git a/src/tparse/type.go b/src/tparse/type.go
new file mode 100644
index 0000000..ee8b5e7
--- /dev/null
+++ b/src/tparse/type.go
@@ -0,0 +1,267 @@
+package tparse
+
+import ()
+
+// LINESEP represents a line seperator
+const LINESEP = 0
+
+// ARGNSEP represents an inline seperator
+const ARGNSEP = 1
+
+// DELIMIT represents an opening or closing delimiter
+const DELIMIT = 2
+
+// AUGMENT represents an augmentation
+const AUGMENT = 3
+
+// LITERAL represents a literal value
+const LITERAL = 4
+
+// KEYTYPE represents a built in type
+const KEYTYPE = 5
+
+// PREWORD represents a reserved pre-processor directive
+const PREWORD = 6
+
+// KEYWORD represents a reserved word
+const KEYWORD = 7
+
+// DEFWORD represents a user-defined word such as a variable, method, or struct
+const DEFWORD = 8
+
+// RESWORD represents all the reserved words and what type of tokens they are.
+var RESWORD = map[string]int{
+ "import": PREWORD,
+
+ "int": KEYTYPE,
+ "bool": KEYTYPE,
+ "float": KEYTYPE,
+ "char": KEYTYPE,
+
+ "struct": KEYWORD,
+ "type": KEYWORD,
+
+ "loop": KEYWORD,
+ "continue": KEYWORD,
+ "break": KEYWORD,
+
+ "switch": KEYWORD,
+ "case": KEYWORD,
+ "default": KEYWORD,
+
+ "label": KEYWORD,
+ "goto": KEYWORD,
+
+ "if": KEYWORD,
+ "else": KEYWORD,
+
+ "const": KEYWORD,
+ "static": KEYWORD,
+ "volatile": KEYWORD,
+
+ "true": LITERAL,
+ "false": LITERAL,
+
+ "null": LITERAL,
+}
+
+func checkResWord(s string) int {
+ out, prs := RESWORD[s]
+ if !prs {
+ return -1
+ }
+ return out
+}
+
+// RESRUNE represents all the reserved runes
+var RESRUNE = map[rune]int{
+ // Starting condition open
+ '(': DELIMIT,
+ // Starting condition close
+ ')': DELIMIT,
+ // Ending condition open
+ '[': DELIMIT,
+ // Ending condition close
+ ']': DELIMIT,
+ // Array mark open
+ '{': DELIMIT,
+ // Array mark close
+ '}': DELIMIT,
+ // String literal
+ '\'': DELIMIT,
+ // String
+ '"': DELIMIT,
+
+ // Start of pre-proc directive
+ ':': LINESEP,
+ // Start of line
+ ';': LINESEP,
+ // Start of comment
+ '#': LINESEP,
+
+ // Seperate arguments
+ ',': ARGNSEP,
+
+ // Assignment
+ '=': AUGMENT,
+
+ // Get
+ '.': AUGMENT,
+
+ // Bitwise and
+ '&': AUGMENT,
+ // Bitwise or
+ '|': AUGMENT,
+ // Bitwise xor
+ '^': AUGMENT,
+
+ // Greater than
+ '>': AUGMENT,
+ // Less than
+ '<': AUGMENT,
+
+ // Not (prefix any bool or bitwise)
+ '!': AUGMENT,
+
+ // Addition
+ '+': AUGMENT,
+ // Subtraction
+ '-': AUGMENT,
+ // Multiplication
+ '*': AUGMENT,
+ // Division
+ '/': AUGMENT,
+ // Mod
+ '%': AUGMENT,
+
+ // Address of
+ '~': AUGMENT,
+ // De-ref
+ '_': AUGMENT,
+}
+
+func checkResRune(r rune) int {
+ out, prs := RESRUNE[r]
+ if !prs {
+ return -1
+ }
+ return out
+}
+
+// RESRUNES Reserved sets of reserved runes which mean something
+var RESRUNES = map[string]int{
+ // Pre-processor block
+ "/:": DELIMIT,
+ ":/": DELIMIT,
+ // Code block
+ "/;": DELIMIT,
+ ";/": DELIMIT,
+ // Comment block
+ "/#": DELIMIT,
+ "#/": DELIMIT,
+
+ // Quick chain
+ "::": DELIMIT,
+ ":;": DELIMIT,
+ ":#": DELIMIT,
+
+ ";;": DELIMIT,
+ ";:": DELIMIT,
+ ";#": DELIMIT,
+
+ "##": DELIMIT,
+ "#:": DELIMIT,
+ "#;": DELIMIT,
+
+ // Boolean equ
+ "==": AUGMENT,
+ // Boolean and
+ "&&": AUGMENT,
+ // Boolean or
+ "||": AUGMENT,
+
+ // Bitwise l-shift
+ "<<": AUGMENT,
+ // Bitwise r-shift
+ ">>": AUGMENT,
+
+ // PREaugmented augmentors
+ "&=": AUGMENT,
+ "|=": AUGMENT,
+ "^=": AUGMENT,
+ "!=": AUGMENT,
+ "+=": AUGMENT,
+ "-=": AUGMENT,
+ "*=": AUGMENT,
+ "/=": AUGMENT,
+ "%=": AUGMENT,
+ "~=": AUGMENT,
+ "_=": AUGMENT,
+
+ // POSTaugmented augmentors
+ "!&": AUGMENT,
+ "!|": AUGMENT,
+ "!^": AUGMENT,
+ "!==": AUGMENT,
+ "!&&": AUGMENT,
+ "!||": AUGMENT,
+ "!>": AUGMENT,
+ "!<": AUGMENT,
+ ">==": AUGMENT,
+ "<==": AUGMENT,
+}
+
+func maxResRunes() int {
+ max := 0
+
+ for k := range RESRUNES {
+ if len(k) > max {
+ max = len(k)
+ }
+ }
+
+ return max
+}
+
+func checkRuneGroup(s string) int {
+ rs := StringAsRunes(s)
+
+ if len(rs) == 1 {
+ return checkResRune(rs[0])
+ }
+
+ out, prs := RESRUNES[s]
+ if !prs {
+ return -1
+ }
+ return out
+}
+
+func checkToken(s string) int {
+ rs := StringAsRunes(s)
+
+ if len(rs) == 0 {
+ return -1
+ }
+
+ if len(rs) == 1 {
+ o := checkResRune(rs[0])
+ if o > -1 {
+ return o
+ }
+ }
+
+ o := checkResWord(s)
+
+ if o > -1 {
+ return o
+ }
+
+ o = checkRuneGroup(s)
+
+ if o > -1 {
+ return o
+ }
+
+ return DEFWORD
+}