diff options
| author | Kyle Gunger <corechg@gmail.com> | 2020-06-28 14:30:45 -0400 | 
|---|---|---|
| committer | Kyle Gunger <corechg@gmail.com> | 2020-06-28 14:30:45 -0400 | 
| commit | 8f9cf0d4856bb53009bb58b53a42e21e2cd1e947 (patch) | |
| tree | b022091a0c3105e2da54b9dc16e5f55852b788f3 /src | |
[Initial parser] Upload existing
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.go | 24 | ||||
| -rw-r--r-- | src/tparse/parse.go | 203 | ||||
| -rw-r--r-- | src/tparse/token.go | 13 | ||||
| -rw-r--r-- | src/tparse/type.go | 267 | 
4 files changed, 507 insertions, 0 deletions
| diff --git a/src/main.go b/src/main.go new file mode 100644 index 0000000..ec3b14b --- /dev/null +++ b/src/main.go @@ -0,0 +1,24 @@ +package main + +import "fmt" +import "tparse" +import "flag" +import "os" + +func main() { +	inputFile := flag.String("in", "", "The file to parse") +	outputFile := flag.String("out", "out.tnp", "The file to store the parse in") + +	flag.Parse() + +	fd, err := os.Create(*outputFile) + +	if err != nil { +		fmt.Println(err.Error()) +		return +	} + +	fd.WriteString(fmt.Sprint(tparse.ParseFile(*inputFile))) + +	fd.Close() +} diff --git a/src/tparse/parse.go b/src/tparse/parse.go new file mode 100644 index 0000000..e9e1ee6 --- /dev/null +++ b/src/tparse/parse.go @@ -0,0 +1,203 @@ +package tparse + +import ( +	"bufio" +	"io" +	"os" +	"strings" +	"unicode" +	"unicode/utf8" +) + +// Read in a number (may be a float) +func numericLiteral(r *bufio.Reader) Token { +	decimal := false +	run, _, err := r.ReadRune() + +	b := strings.Builder{} + +	for ; err == nil; run, _, err = r.ReadRune() { +		if (run == '.' || run == ',') && !decimal { +			decimal = true +		} else if !unicode.IsNumber(run) { +			break +		} +		b.WriteRune(run) +	} + +	r.UnreadRune() + +	return Token{Type: LITERAL, Data: b.String()} +} + +// Parse a string (will escape \" only in this stage) +func stringLiteral(r *bufio.Reader) Token { +	escape := false +	run, _, err := r.ReadRune() + +	if run != '"' { +		return Token{Type: LITERAL} +	} + +	b := strings.Builder{} + +	for ; err == nil; run, _, err = r.ReadRune() { +		b.WriteRune(run) +		if run == '\\' && !escape { +			escape = true +		} else if run == '"' && !escape { +			break +		} + +	} + +	return Token{Type: LITERAL, Data: b.String()} +} + +// Parse a character in (escape \\ or \') +func charLiteral(r *bufio.Reader) Token { +	escape := false +	run, _, err := r.ReadRune() + +	if run != '\'' { +		return Token{Type: LITERAL} +	} + +	b := strings.Builder{} + +	for ; err == nil; run, _, err = r.ReadRune() { +		b.WriteRune(run) +		if run == '\\' && !escape { +			escape = true +		} else if run == '\'' && !escape { +			break +		} + +	} + +	return Token{Type: LITERAL, Data: b.String()} +} + +// Split reserved runes into rune groups +func splitResRunes(str string, max int) []Token { +	out := []Token{} + +	rs := StringAsRunes(str) +	s, e := 0, max + +	if max > len(rs) { +		e = len(rs) +	} + +	for e <= len(rs) && s < len(rs) { +		if checkRuneGroup(RunesAsString(rs[s:e])) != -1 || e == s+1 { +			tmp := RunesAsString(rs[s:e]) +			out = append(out, Token{Type: checkRuneGroup(tmp), Data: tmp}) +			s = e +			if s+max < len(rs) { +				e = s + max +			} else { +				e = len(rs) +			} +		} else if e != s+1 { +			e-- +		} +	} + +	return out +} + +// ParseFile tries to read a file and turn it into a series of tokens +func ParseFile(path string) []Token { +	out := []Token{} + +	fd, err := os.Open(path) + +	if err != nil { +		return out +	} + +	read := bufio.NewReader(fd) + +	b := strings.Builder{} + +	max := maxResRunes() + +	for r := ' '; ; r, _, err = read.ReadRune() { +		// If error in stream or EOF, break +		if err != nil { +			if err != io.EOF { +				out = append(out, Token{Type: -1}) +			} +			break +		} + +		// Checking for a space +		if unicode.IsSpace(r) { +			if b.String() != "" { +				out = append(out, Token{Type: checkToken(b.String()), Data: b.String()}) +				b.Reset() +			} +			continue +		} + +		// Checking for a rune group +		if checkResRune(r) != -1 { +			if b.String() != "" { +				out = append(out, Token{Type: checkToken(b.String()), Data: b.String()}) +				b.Reset() +			} + +			for ; err == nil; r, _, err = read.ReadRune() { +				if checkResRune(r) == -1 { +					break +				} +				b.WriteRune(r) +			} + +			read.UnreadRune() + +			out = append(out, splitResRunes(b.String(), max)...) + +			b.Reset() + +			continue +		} + +		// Accumulate +		b.WriteRune(r) +	} + +	return out +} + +// StringAsRunes returns a string as a rune slice +func StringAsRunes(s string) []rune { +	out := []rune{} +	for i, j := 0, 0; i < len(s); i += j { +		r, w := utf8.DecodeRuneInString(s[i:]) +		out = append(out, r) +		j = w +	} +	return out +} + +// BytesAsRunes returns a byte slice as a rune slice +func BytesAsRunes(b []byte) []rune { +	out := []rune{} +	for i, j := 0, 0; i < len(b); i += j { +		r, w := utf8.DecodeRune(b[i:]) +		out = append(out, r) +		j = w +	} +	return out +} + +// RunesAsString returns a string from a slice of runes +func RunesAsString(rs []rune) string { +	b := strings.Builder{} +	for _, r := range rs { +		b.WriteRune(r) +	} +	return b.String() +} diff --git a/src/tparse/token.go b/src/tparse/token.go new file mode 100644 index 0000000..712b746 --- /dev/null +++ b/src/tparse/token.go @@ -0,0 +1,13 @@ +package tparse + +// Token represents a token in a program +type Token struct { +	Type int +	Data string +} + +// Container represents a container of data +type Container struct { +	Data  []interface{} +	Holds bool +} diff --git a/src/tparse/type.go b/src/tparse/type.go new file mode 100644 index 0000000..ee8b5e7 --- /dev/null +++ b/src/tparse/type.go @@ -0,0 +1,267 @@ +package tparse + +import () + +// LINESEP represents a line seperator +const LINESEP = 0 + +// ARGNSEP represents an inline seperator +const ARGNSEP = 1 + +// DELIMIT represents an opening or closing delimiter +const DELIMIT = 2 + +// AUGMENT represents an augmentation +const AUGMENT = 3 + +// LITERAL represents a literal value +const LITERAL = 4 + +// KEYTYPE represents a built in type +const KEYTYPE = 5 + +// PREWORD represents a reserved pre-processor directive +const PREWORD = 6 + +// KEYWORD represents a reserved word +const KEYWORD = 7 + +// DEFWORD represents a user-defined word such as a variable, method, or struct +const DEFWORD = 8 + +// RESWORD represents all the reserved words and what type of tokens they are. +var RESWORD = map[string]int{ +	"import": PREWORD, + +	"int":   KEYTYPE, +	"bool":  KEYTYPE, +	"float": KEYTYPE, +	"char":  KEYTYPE, + +	"struct": KEYWORD, +	"type":   KEYWORD, + +	"loop":     KEYWORD, +	"continue": KEYWORD, +	"break":    KEYWORD, + +	"switch":  KEYWORD, +	"case":    KEYWORD, +	"default": KEYWORD, + +	"label": KEYWORD, +	"goto":  KEYWORD, + +	"if":   KEYWORD, +	"else": KEYWORD, + +	"const":    KEYWORD, +	"static":   KEYWORD, +	"volatile": KEYWORD, + +	"true":  LITERAL, +	"false": LITERAL, + +	"null": LITERAL, +} + +func checkResWord(s string) int { +	out, prs := RESWORD[s] +	if !prs { +		return -1 +	} +	return out +} + +// RESRUNE represents all the reserved runes +var RESRUNE = map[rune]int{ +	// Starting condition open +	'(': DELIMIT, +	// Starting condition close +	')': DELIMIT, +	// Ending condition open +	'[': DELIMIT, +	// Ending condition close +	']': DELIMIT, +	// Array mark open +	'{': DELIMIT, +	// Array mark close +	'}': DELIMIT, +	// String literal +	'\'': DELIMIT, +	// String +	'"': DELIMIT, + +	// Start of pre-proc directive +	':': LINESEP, +	// Start of line +	';': LINESEP, +	// Start of comment +	'#': LINESEP, + +	// Seperate arguments +	',': ARGNSEP, + +	// Assignment +	'=': AUGMENT, + +	// Get +	'.': AUGMENT, + +	// Bitwise and +	'&': AUGMENT, +	// Bitwise or +	'|': AUGMENT, +	// Bitwise xor +	'^': AUGMENT, + +	// Greater than +	'>': AUGMENT, +	// Less than +	'<': AUGMENT, + +	// Not (prefix any bool or bitwise) +	'!': AUGMENT, + +	// Addition +	'+': AUGMENT, +	// Subtraction +	'-': AUGMENT, +	// Multiplication +	'*': AUGMENT, +	// Division +	'/': AUGMENT, +	// Mod +	'%': AUGMENT, + +	// Address of +	'~': AUGMENT, +	// De-ref +	'_': AUGMENT, +} + +func checkResRune(r rune) int { +	out, prs := RESRUNE[r] +	if !prs { +		return -1 +	} +	return out +} + +// RESRUNES Reserved sets of reserved runes which mean something +var RESRUNES = map[string]int{ +	// Pre-processor block +	"/:": DELIMIT, +	":/": DELIMIT, +	// Code block +	"/;": DELIMIT, +	";/": DELIMIT, +	// Comment block +	"/#": DELIMIT, +	"#/": DELIMIT, + +	// Quick chain +	"::": DELIMIT, +	":;": DELIMIT, +	":#": DELIMIT, + +	";;": DELIMIT, +	";:": DELIMIT, +	";#": DELIMIT, + +	"##": DELIMIT, +	"#:": DELIMIT, +	"#;": DELIMIT, + +	// Boolean equ +	"==": AUGMENT, +	// Boolean and +	"&&": AUGMENT, +	// Boolean or +	"||": AUGMENT, + +	// Bitwise l-shift +	"<<": AUGMENT, +	// Bitwise r-shift +	">>": AUGMENT, + +	// PREaugmented augmentors +	"&=": AUGMENT, +	"|=": AUGMENT, +	"^=": AUGMENT, +	"!=": AUGMENT, +	"+=": AUGMENT, +	"-=": AUGMENT, +	"*=": AUGMENT, +	"/=": AUGMENT, +	"%=": AUGMENT, +	"~=": AUGMENT, +	"_=": AUGMENT, + +	// POSTaugmented augmentors +	"!&":  AUGMENT, +	"!|":  AUGMENT, +	"!^":  AUGMENT, +	"!==": AUGMENT, +	"!&&": AUGMENT, +	"!||": AUGMENT, +	"!>":  AUGMENT, +	"!<":  AUGMENT, +	">==": AUGMENT, +	"<==": AUGMENT, +} + +func maxResRunes() int { +	max := 0 + +	for k := range RESRUNES { +		if len(k) > max { +			max = len(k) +		} +	} + +	return max +} + +func checkRuneGroup(s string) int { +	rs := StringAsRunes(s) + +	if len(rs) == 1 { +		return checkResRune(rs[0]) +	} + +	out, prs := RESRUNES[s] +	if !prs { +		return -1 +	} +	return out +} + +func checkToken(s string) int { +	rs := StringAsRunes(s) + +	if len(rs) == 0 { +		return -1 +	} + +	if len(rs) == 1 { +		o := checkResRune(rs[0]) +		if o > -1 { +			return o +		} +	} + +	o := checkResWord(s) + +	if o > -1 { +		return o +	} + +	o = checkRuneGroup(s) + +	if o > -1 { +		return o +	} + +	return DEFWORD +} |