From 8f9cf0d4856bb53009bb58b53a42e21e2cd1e947 Mon Sep 17 00:00:00 2001
From: Kyle Gunger <corechg@gmail.com>
Date: Sun, 28 Jun 2020 14:30:45 -0400
Subject: [Initial parser] Upload existing

---
 src/main.go         |  24 +++++
 src/tparse/parse.go | 203 +++++++++++++++++++++++++++++++++++++++
 src/tparse/token.go |  13 +++
 src/tparse/type.go  | 267 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 507 insertions(+)
 create mode 100644 src/main.go
 create mode 100644 src/tparse/parse.go
 create mode 100644 src/tparse/token.go
 create mode 100644 src/tparse/type.go

(limited to 'src')

diff --git a/src/main.go b/src/main.go
new file mode 100644
index 0000000..ec3b14b
--- /dev/null
+++ b/src/main.go
@@ -0,0 +1,24 @@
+package main
+
+import "fmt"
+import "tparse"
+import "flag"
+import "os"
+
+func main() {
+	inputFile := flag.String("in", "", "The file to parse")
+	outputFile := flag.String("out", "out.tnp", "The file to store the parse in")
+
+	flag.Parse()
+
+	fd, err := os.Create(*outputFile)
+
+	if err != nil {
+		fmt.Println(err.Error())
+		return
+	}
+
+	fd.WriteString(fmt.Sprint(tparse.ParseFile(*inputFile)))
+
+	fd.Close()
+}
diff --git a/src/tparse/parse.go b/src/tparse/parse.go
new file mode 100644
index 0000000..e9e1ee6
--- /dev/null
+++ b/src/tparse/parse.go
@@ -0,0 +1,203 @@
+package tparse
+
+import (
+	"bufio"
+	"io"
+	"os"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// Read in a number (may be a float)
+func numericLiteral(r *bufio.Reader) Token {
+	decimal := false
+	run, _, err := r.ReadRune()
+
+	b := strings.Builder{}
+
+	for ; err == nil; run, _, err = r.ReadRune() {
+		if (run == '.' || run == ',') && !decimal {
+			decimal = true
+		} else if !unicode.IsNumber(run) {
+			break
+		}
+		b.WriteRune(run)
+	}
+
+	r.UnreadRune()
+
+	return Token{Type: LITERAL, Data: b.String()}
+}
+
+// Parse a string (will escape \" only in this stage)
+func stringLiteral(r *bufio.Reader) Token {
+	escape := false
+	run, _, err := r.ReadRune()
+
+	if run != '"' {
+		return Token{Type: LITERAL}
+	}
+
+	b := strings.Builder{}
+
+	for ; err == nil; run, _, err = r.ReadRune() {
+		b.WriteRune(run)
+		if run == '\\' && !escape {
+			escape = true
+		} else if run == '"' && !escape {
+			break
+		}
+
+	}
+
+	return Token{Type: LITERAL, Data: b.String()}
+}
+
+// Parse a character in (escape \\ or \')
+func charLiteral(r *bufio.Reader) Token {
+	escape := false
+	run, _, err := r.ReadRune()
+
+	if run != '\'' {
+		return Token{Type: LITERAL}
+	}
+
+	b := strings.Builder{}
+
+	for ; err == nil; run, _, err = r.ReadRune() {
+		b.WriteRune(run)
+		if run == '\\' && !escape {
+			escape = true
+		} else if run == '\'' && !escape {
+			break
+		}
+
+	}
+
+	return Token{Type: LITERAL, Data: b.String()}
+}
+
+// Split reserved runes into rune groups
+func splitResRunes(str string, max int) []Token {
+	out := []Token{}
+
+	rs := StringAsRunes(str)
+	s, e := 0, max
+
+	if max > len(rs) {
+		e = len(rs)
+	}
+
+	for e <= len(rs) && s < len(rs) {
+		if checkRuneGroup(RunesAsString(rs[s:e])) != -1 || e == s+1 {
+			tmp := RunesAsString(rs[s:e])
+			out = append(out, Token{Type: checkRuneGroup(tmp), Data: tmp})
+			s = e
+			if s+max < len(rs) {
+				e = s + max
+			} else {
+				e = len(rs)
+			}
+		} else if e != s+1 {
+			e--
+		}
+	}
+
+	return out
+}
+
+// ParseFile tries to read a file and turn it into a series of tokens
+func ParseFile(path string) []Token {
+	out := []Token{}
+
+	fd, err := os.Open(path)
+
+	if err != nil {
+		return out
+	}
+
+	read := bufio.NewReader(fd)
+
+	b := strings.Builder{}
+
+	max := maxResRunes()
+
+	for r := ' '; ; r, _, err = read.ReadRune() {
+		// If error in stream or EOF, break
+		if err != nil {
+			if err != io.EOF {
+				out = append(out, Token{Type: -1})
+			}
+			break
+		}
+
+		// Checking for a space
+		if unicode.IsSpace(r) {
+			if b.String() != "" {
+				out = append(out, Token{Type: checkToken(b.String()), Data: b.String()})
+				b.Reset()
+			}
+			continue
+		}
+
+		// Checking for a rune group
+		if checkResRune(r) != -1 {
+			if b.String() != "" {
+				out = append(out, Token{Type: checkToken(b.String()), Data: b.String()})
+				b.Reset()
+			}
+
+			for ; err == nil; r, _, err = read.ReadRune() {
+				if checkResRune(r) == -1 {
+					break
+				}
+				b.WriteRune(r)
+			}
+
+			read.UnreadRune()
+
+			out = append(out, splitResRunes(b.String(), max)...)
+
+			b.Reset()
+
+			continue
+		}
+
+		// Accumulate
+		b.WriteRune(r)
+	}
+
+	return out
+}
+
+// StringAsRunes returns a string as a rune slice
+func StringAsRunes(s string) []rune {
+	out := []rune{}
+	for i, j := 0, 0; i < len(s); i += j {
+		r, w := utf8.DecodeRuneInString(s[i:])
+		out = append(out, r)
+		j = w
+	}
+	return out
+}
+
+// BytesAsRunes returns a byte slice as a rune slice
+func BytesAsRunes(b []byte) []rune {
+	out := []rune{}
+	for i, j := 0, 0; i < len(b); i += j {
+		r, w := utf8.DecodeRune(b[i:])
+		out = append(out, r)
+		j = w
+	}
+	return out
+}
+
+// RunesAsString returns a string from a slice of runes
+func RunesAsString(rs []rune) string {
+	b := strings.Builder{}
+	for _, r := range rs {
+		b.WriteRune(r)
+	}
+	return b.String()
+}
diff --git a/src/tparse/token.go b/src/tparse/token.go
new file mode 100644
index 0000000..712b746
--- /dev/null
+++ b/src/tparse/token.go
@@ -0,0 +1,13 @@
+package tparse
+
+// Token represents a token in a program
+type Token struct {
+	Type int
+	Data string
+}
+
+// Container represents a container of data
+type Container struct {
+	Data  []interface{}
+	Holds bool
+}
diff --git a/src/tparse/type.go b/src/tparse/type.go
new file mode 100644
index 0000000..ee8b5e7
--- /dev/null
+++ b/src/tparse/type.go
@@ -0,0 +1,267 @@
+package tparse
+
+import ()
+
+// LINESEP represents a line seperator
+const LINESEP = 0
+
+// ARGNSEP represents an inline seperator
+const ARGNSEP = 1
+
+// DELIMIT represents an opening or closing delimiter
+const DELIMIT = 2
+
+// AUGMENT represents an augmentation
+const AUGMENT = 3
+
+// LITERAL represents a literal value
+const LITERAL = 4
+
+// KEYTYPE represents a built in type
+const KEYTYPE = 5
+
+// PREWORD represents a reserved pre-processor directive
+const PREWORD = 6
+
+// KEYWORD represents a reserved word
+const KEYWORD = 7
+
+// DEFWORD represents a user-defined word such as a variable, method, or struct
+const DEFWORD = 8
+
+// RESWORD represents all the reserved words and what type of tokens they are.
+var RESWORD = map[string]int{
+	"import": PREWORD,
+
+	"int":   KEYTYPE,
+	"bool":  KEYTYPE,
+	"float": KEYTYPE,
+	"char":  KEYTYPE,
+
+	"struct": KEYWORD,
+	"type":   KEYWORD,
+
+	"loop":     KEYWORD,
+	"continue": KEYWORD,
+	"break":    KEYWORD,
+
+	"switch":  KEYWORD,
+	"case":    KEYWORD,
+	"default": KEYWORD,
+
+	"label": KEYWORD,
+	"goto":  KEYWORD,
+
+	"if":   KEYWORD,
+	"else": KEYWORD,
+
+	"const":    KEYWORD,
+	"static":   KEYWORD,
+	"volatile": KEYWORD,
+
+	"true":  LITERAL,
+	"false": LITERAL,
+
+	"null": LITERAL,
+}
+
+func checkResWord(s string) int {
+	out, prs := RESWORD[s]
+	if !prs {
+		return -1
+	}
+	return out
+}
+
+// RESRUNE represents all the reserved runes
+var RESRUNE = map[rune]int{
+	// Starting condition open
+	'(': DELIMIT,
+	// Starting condition close
+	')': DELIMIT,
+	// Ending condition open
+	'[': DELIMIT,
+	// Ending condition close
+	']': DELIMIT,
+	// Array mark open
+	'{': DELIMIT,
+	// Array mark close
+	'}': DELIMIT,
+	// String literal
+	'\'': DELIMIT,
+	// String
+	'"': DELIMIT,
+
+	// Start of pre-proc directive
+	':': LINESEP,
+	// Start of line
+	';': LINESEP,
+	// Start of comment
+	'#': LINESEP,
+
+	// Seperate arguments
+	',': ARGNSEP,
+
+	// Assignment
+	'=': AUGMENT,
+
+	// Get
+	'.': AUGMENT,
+
+	// Bitwise and
+	'&': AUGMENT,
+	// Bitwise or
+	'|': AUGMENT,
+	// Bitwise xor
+	'^': AUGMENT,
+
+	// Greater than
+	'>': AUGMENT,
+	// Less than
+	'<': AUGMENT,
+
+	// Not (prefix any bool or bitwise)
+	'!': AUGMENT,
+
+	// Addition
+	'+': AUGMENT,
+	// Subtraction
+	'-': AUGMENT,
+	// Multiplication
+	'*': AUGMENT,
+	// Division
+	'/': AUGMENT,
+	// Mod
+	'%': AUGMENT,
+
+	// Address of
+	'~': AUGMENT,
+	// De-ref
+	'_': AUGMENT,
+}
+
+func checkResRune(r rune) int {
+	out, prs := RESRUNE[r]
+	if !prs {
+		return -1
+	}
+	return out
+}
+
+// RESRUNES Reserved sets of reserved runes which mean something
+var RESRUNES = map[string]int{
+	// Pre-processor block
+	"/:": DELIMIT,
+	":/": DELIMIT,
+	// Code block
+	"/;": DELIMIT,
+	";/": DELIMIT,
+	// Comment block
+	"/#": DELIMIT,
+	"#/": DELIMIT,
+
+	// Quick chain
+	"::": DELIMIT,
+	":;": DELIMIT,
+	":#": DELIMIT,
+
+	";;": DELIMIT,
+	";:": DELIMIT,
+	";#": DELIMIT,
+
+	"##": DELIMIT,
+	"#:": DELIMIT,
+	"#;": DELIMIT,
+
+	// Boolean equ
+	"==": AUGMENT,
+	// Boolean and
+	"&&": AUGMENT,
+	// Boolean or
+	"||": AUGMENT,
+
+	// Bitwise l-shift
+	"<<": AUGMENT,
+	// Bitwise r-shift
+	">>": AUGMENT,
+
+	// PREaugmented augmentors
+	"&=": AUGMENT,
+	"|=": AUGMENT,
+	"^=": AUGMENT,
+	"!=": AUGMENT,
+	"+=": AUGMENT,
+	"-=": AUGMENT,
+	"*=": AUGMENT,
+	"/=": AUGMENT,
+	"%=": AUGMENT,
+	"~=": AUGMENT,
+	"_=": AUGMENT,
+
+	// POSTaugmented augmentors
+	"!&":  AUGMENT,
+	"!|":  AUGMENT,
+	"!^":  AUGMENT,
+	"!==": AUGMENT,
+	"!&&": AUGMENT,
+	"!||": AUGMENT,
+	"!>":  AUGMENT,
+	"!<":  AUGMENT,
+	">==": AUGMENT,
+	"<==": AUGMENT,
+}
+
+func maxResRunes() int {
+	max := 0
+
+	for k := range RESRUNES {
+		if len(k) > max {
+			max = len(k)
+		}
+	}
+
+	return max
+}
+
+func checkRuneGroup(s string) int {
+	rs := StringAsRunes(s)
+
+	if len(rs) == 1 {
+		return checkResRune(rs[0])
+	}
+
+	out, prs := RESRUNES[s]
+	if !prs {
+		return -1
+	}
+	return out
+}
+
+func checkToken(s string) int {
+	rs := StringAsRunes(s)
+
+	if len(rs) == 0 {
+		return -1
+	}
+
+	if len(rs) == 1 {
+		o := checkResRune(rs[0])
+		if o > -1 {
+			return o
+		}
+	}
+
+	o := checkResWord(s)
+
+	if o > -1 {
+		return o
+	}
+
+	o = checkRuneGroup(s)
+
+	if o > -1 {
+		return o
+	}
+
+	return DEFWORD
+}
-- 
cgit v1.2.3