summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyle Gunger <kgunger12@gmail.com>2021-08-30 19:07:26 -0400
committerKyle Gunger <kgunger12@gmail.com>2021-08-30 19:07:26 -0400
commitea5ef2fe245c09b35c783977928d6e995110cfb4 (patch)
treee25f71adb433bba34b10c3a013d8b10c24f159e3
parent628dd83397c47ff484f7c81b06dcd6d1e4af628b (diff)
Scrap old spec, add initial value parsing
-rw-r--r--.gitignore2
-rwxr-xr-x[-rw-r--r--]gobuild.sh4
-rw-r--r--small-tests/examp.tnsl14
-rw-r--r--spec/compiler/compiler.txt23
-rw-r--r--spec/compiler/pre-processor.txt9
-rw-r--r--spec/compiler/tokenizer.txt0
-rw-r--r--spec/iex/iex-spec.txt128
-rw-r--r--spec/libts.txt93
-rw-r--r--spec/spec.txt928
-rw-r--r--src/tparse/tree-preproc.go49
-rw-r--r--src/tparse/tree-value.go138
-rw-r--r--src/tparse/tree.go4
-rw-r--r--src/tparse/type.go5
13 files changed, 189 insertions, 1208 deletions
diff --git a/.gitignore b/.gitignore
index a2ce39d..ce0e4f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,4 @@ build/
.vscode/
*-test.tnp
-*-test.tnt \ No newline at end of file
+*.tnt \ No newline at end of file
diff --git a/gobuild.sh b/gobuild.sh
index daa0a55..e85b718 100644..100755
--- a/gobuild.sh
+++ b/gobuild.sh
@@ -2,7 +2,7 @@
SRCDIR=$(pwd)
-GOPATH="$GOPATH:$SRCDIR"
-GO111MODULE=off
+export GOPATH="$GOPATH:$SRCDIR"
+export GO111MODULE="off"
go build -o build/${1} src/${1}.go
diff --git a/small-tests/examp.tnsl b/small-tests/examp.tnsl
index 0c32f3f..196db40 100644
--- a/small-tests/examp.tnsl
+++ b/small-tests/examp.tnsl
@@ -1,6 +1,6 @@
#Comment like this
-/##
+/#
Or like this (blocks begin with /<symbol> and end with <symbol>/)
Block Comment
#/
@@ -9,8 +9,8 @@
#/
# Preprocessor directives are like this
-# Import from library using '
-:import 'what/what.tnsl' a
+# Import an external module from library using '
+:import 'what'
# Import from local file using "
:import "what/what.tnsl"
@@ -65,11 +65,11 @@
-# The struct keyword is followed by [name] {values}
-;struct [s1] {string Name, string Message = "Default message (c-style strings)"}
+# The struct keyword is followed by <name> {values}
+;struct s1 {string Name, string Message = "Default message (c-style strings)"}
# Most people should declare as such:
-;struct [s1] {
+;struct s1 {
string Name,
string Message = "Default message (c-style strings)"
}
@@ -156,7 +156,7 @@
# Dumb generic type struct
-; struct [gen] (type T) {
+; struct gen (type T) {
T i
}
diff --git a/spec/compiler/compiler.txt b/spec/compiler/compiler.txt
deleted file mode 100644
index df7a591..0000000
--- a/spec/compiler/compiler.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-The "compiler" of the TNSL language is formed of several parts, each discussed
-in their own file.
-
- 1) Parsing
-
- 1a) The tokenizer - chews up .tnsl files and spits out tokens
- 1b) The file resolver - interperets import commands and creates a tree of files to compile
- 1c) The definer -
- 1d) The AST generator - indexes tokens and provides some rudimentary syntax checking
-
- 2) Validation
-
- 2a) The expander - expands pre-processor commands and macro operations
- 2b) The validator - checks all requirements for all functions and expressions are valid
-
- 3) Translation
-
- 3a) Variable resolver - chooses and creates an internal model of where variables are on the stack, as well as calling conventions for functions
- 3b) Type translator - places and translates types (internal and external)
- 3c) Container resolver - checks container (elf, exe, dll, app, out, etc.) and decides how to map the types for future linking
- 3d) Final translation - translates all logic and expressions into native binary code
- 3e) Optimization
- 3f) Container Writer - writes the final output of the previous steps into the specified container
diff --git a/spec/compiler/pre-processor.txt b/spec/compiler/pre-processor.txt
deleted file mode 100644
index 8297e7d..0000000
--- a/spec/compiler/pre-processor.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-A set of components in the compiler (all before the translator) which expand,
-resolve, and validate pre-processor directives into code which can be compiled
-
-Consists of the resolver in the parsor.
-The expander and part of the indexer in the validator.
-
-Special (compiler specififc) definitions can be added with _<compiler>_<variable name>
-
-E.X. if the official implementation wanted to provide the exact time of the compilation, it could use _TNSLC_TIME_NOW for instance \ No newline at end of file
diff --git a/spec/compiler/tokenizer.txt b/spec/compiler/tokenizer.txt
deleted file mode 100644
index e69de29..0000000
--- a/spec/compiler/tokenizer.txt
+++ /dev/null
diff --git a/spec/iex/iex-spec.txt b/spec/iex/iex-spec.txt
deleted file mode 100644
index 6979cfc..0000000
--- a/spec/iex/iex-spec.txt
+++ /dev/null
@@ -1,128 +0,0 @@
-This is the IEX file specification.
-
-Document version (semver): 0.0.1
-Main Author: Kyle Gunger
-
-License: Apache 2.0
-
-----------------------------------
-
-Contents:
-
-Organization
-
-----------------------------------
-
-Organization
-
-Magic number starts the file "IEX" or 0x49 0x45 0x58
-
-The header of the file can be represented as such
-
-;struct IEX_HEAD {
- raw {3}char # Always "IEX"
- magic,
-
- {}char
- name,
- arch,
- os,
-
- uint8 # Version info
- major,
- minor,
- patch,
- # OS abi info
- os_major,
- os_minor,
- os_patch,
-
- bool # Tells loader that the file holds a main function
- can_execute,
-
- ~void ({}{}char args) [int]
- # Address of main in file
- start_addr,
-
- {}IEX_SECTION
- sections,
-
- {}IEX_LIB
- dependencies,
-
- IEX_MODULE
- self
-}
-
-a section is defined as
-
-;struct IEX_SECTION {
- {}char
- name,
-
- uint8 # Denotes dependency, symbol table, data, bss, text, etc.
- type,
-
- ~void # Points to start and end of section
- start,
- end
-}
-
-;struct IEX_LIB {
- {}char
- name,
-
- uint8 # Version info
- major,
- minor,
- patch
-}
-
-;struct IEX_MODULE {
- {}char
- name,
-
- {}IEX_FUNCTION
- func,
-
- {}IEX_TYPE
- types,
-
- {}IEX_MODULE
- sub
-}
-
-;struct IEX_FUNCTION {
- {}char
- name,
-
- uint32
- overload,
- bytes_in,
- bytes_out,
-
- ~void
- addr
-}
-
-;struct IEX_TYPE {
- {}char
- name,
-
- bool
- raw_struct,
- interface,
- dynamic,
-
- uint32
- size,
-
- {}IEX_FUNCTION
- methods,
-
- {}IEX_TYPE
- supers,
-
- ~void
- addr
-}
diff --git a/spec/libts.txt b/spec/libts.txt
deleted file mode 100644
index e31d59d..0000000
--- a/spec/libts.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-This is the LIBTS specification, a document related to the definition of the TNSL language,
-meta-language, it's usage, and where it's standard libraries may be ported.
-
-Document version (semver): 0.0.1
-Main Author: Kyle Gunger
-
-License: Apache 2.0
-
-----------------------------------
-
-Contents:
-
-Part 1 -- About the Library
- 1.1: libts and TNSL
- 1.2: Short overview
-
-Part 2 -- Library Features
- 2.1: Bitwise operations
- 2.2: Standard algorithms
- 2.3: Continer structs
- 2.4: Standard abstractions
- 2.5: Time related APIs
- 2.6: Method resolution
-
-----------------------------------
-
-Part 1: About the Library
-
-----------------------------------
-
-Section 1: libts and TNSL
-
- libts or the TNSL standard library is a library to be found on almost all systems containing
- a TNSL implimentation. The library contains both common APIs which ease common programming
- feats as well as core program logic for the more high-level features of TNSL. TNSL can
- indeed run without a libts, however doing so limits many parts of the language to the "raw"
- variants. These limited features include structs, types, and interfaces primarially.
-
- libts contains a standard method resolution algorithm such that method calls can be resolved
- on runtime for the high level dynamic type system.
-
-----------------------------------
-
-Section 2: Short overview
-
- The library contains many standard algoritims and abstractions found in other languages such
- as C, as well as helpers for these containers.
-
-
-----------------------------------
-
-Part 2: Library Features
-
-----------------------------------
-
-Section 1: Bitwise Operations
-
-----------------------------------
-
-Section 2: Standard algorithms
-
-----------------------------------
-
-Section 3: Continer structs
-
-----------------------------------
-
-Section 4: Standard abstractions
-
-----------------------------------
-
-Section 5: Time related APIs
-
- Time is important for any program, and libts provides an interface through which to query
- the operating system for it. libts also provides conversion, projection, and timing features
- so that programs can make use of user inputs relating to time. If bugs in the time projection
- of the library, they will be fixed. No reguard to existing data should be given, accuracy is
- the goal above breakage.
-
- That being said, the library should attempt to work around erronious data in a way that
- provides an alternative date if the data is in a good format but pointing to a non-existant
- date. An error code will still be thrown in these cases, but indicating that the date does
- not exist, not that the input was complete garbage. Programs may catch errors in any way
- they choose.
-
-----------------------------------
-
-Section 6: Method resolution
-
- As previously stated, the library performs method resolution for calls on high-level types.
- In an iex file, this is done by finding the associated IEX_TYPE of the object and following
- along the "supers" array and checking with the "methods" arrays until the IEX_TYPE contains
- a definition for the method called which fits the caller's arguments. \ No newline at end of file
diff --git a/spec/spec.txt b/spec/spec.txt
deleted file mode 100644
index 4086d3c..0000000
--- a/spec/spec.txt
+++ /dev/null
@@ -1,928 +0,0 @@
-This is the TNSL specification, a document related to the definition of the TNSL language,
-meta-language, it's usage, and where it's compiler may be ported.
-
-Document version (semver): 0.0.1
-Main Author: Kyle Gunger
-
-License: Apache 2.0
-
-----------------------------------
-
-Contents:
-
-Preamble
-
-Part 1 -- The Language
- 1.1: .tnsl Files
- 1.2: Blocks
- 1.3: Statements
- 1.4: Types
- 1.5: Operators
- 1.6: Borrow checker
- 1.7: Anonymous blocks
- 1.8: Raw and Asm
-
-Part 2 -- Related Features
- 2.1: Style guide
- 2.2: The pre-processor
- 2.3: Compiler options
- 2.4: Included tools
-
-Part 3 -- The TNSL Calling ABI
- 3.1:
-
-Part 4 -- Standard libs
- 4.1: Bare-metal
- 4.2: libts
- 4.3: Cross call libc
-
-Appendix A: Reserved Characters and their Uses
-Appendix B: Multi-Character Operators
-
-----------------------------------
-
-Preamble
-
-The past few years have seen an explosion of languages trying to break into
-the space C has held for so long, with low to moderate success. TNSL's
-primary goal is not "fix" C into some every man's language, the hope is that
-TNSL will be relatively nice to program in for *programmers*, and that this
-quality will be rewarding to learn for those new to the field. TNSL's goal
-is more to de-mistify some of the hard edges which make C difficult to
-*completely* grasp for new programmers. By making the specification and
-documentation open and free, we hope that new and old programmers can
-contribute and easily experience a nice language, so once a novice goes out
-into the real-world of TNSL, it won't be too far removed from the documentation
-of the language at its core. We also hope that TNSL will be compitant enough
-to be loved by veterans of the field as well, and make life slightly easier for
-those wierdos who still love to program without safty.
-
-Though TNSL does not completely resemble those who have inspired it, you will
-find that many features present in languages such as C, Golang, Rust, Java,
-and C++ still live on in these halls.
-
-Welcome to TNSL
-
-
-----------------------------------
-
-Part 1: The Language
-
-----------------------------------
-
-Section 1: .tnsl files
-
-The high-level files containing code in the TNSL language contain the .tnsl extension.
-Each file may contain 0 or more of the following:
-
- 1) Comments
-
- 2) Pre-processor statements
-
- 3) Variable definitions
-
- 4) Named code blocks
-
- 5) Method blocks
-
- 6) Module blocks
-
-Code blocks and method blocks are the only blocks which may contain statements (1.3) and logical blocks.
-
-----------------------------------
-
-Section 2: Blocks
-
-TNSL files will consist primarily of blocks both with user-defined names and language-defined keywords.
-Standard blocks (and their modifiers) are listed here.
-
-Each block in TNSL starts with a forward slash ( / ) followed by the character of the block.
-The block ends with the reverse of these characters, or at special multi-character boundaries.
-
-
- 1.2.1: The Comment Block
- "/#" starts the comment block, and "#/" ends the comment block.
- Any text within the block will be ignored by the pre-processor and compiler.
-
-
- 1.2.2: The Pre-Processor Block
- "/:" starts the pre-processor block, which is usually followed by a pre-processor keyword.
-
- Every line in the pre-processor block acts as if the keyword specified at the start of the block was placed
- at the beginning of the line.
-
- E.g.
-
- /: import
- 'math'
- "physics"
- :/
-
- represents the following two pre-processor statements:
-
- : import 'math'
- : import "physics"
-
- ":/" ends the block.
-
-
- 1.2.3: The Code Block
- "/;" starts the code block.
-
- The code block may be followed immediately by the following qualifiers:
-
- - "loop" to represent a looping block.
-
- - "if" to represent the start of an if chain.
-
- - "if else" to represent the start of a secondary if case.
-
- - "else" to represent the fallback block of an if chain.
-
- - "match" to represent the beginning of a match (or switch) block.
-
- - "case" (inside match only) to represent the start of a case block.
-
- - "default" catchall case
-
- - "method" to represent a set of methods for use on a type.
-
- - "override" to replace an extended method with a new one.
-
- - "operator" (inside method only) to represent an operator overload.
-
- - "interface" to create a set of methods that a struct must impliment if it extends the interface.
-
- - "module" to represent a set of related functions, types, methods, and other modules.
- (in other languages this is sometimes referred to as a namespace)
-
- - A non-keyword consisting of unreserved characters to "name" a block as a function or method.
-
-
- If none of the above are put after the start of a block then the block is considered anonymous.
- This is discussed more in 1.8 (Anonymous Blocks)
-
-
- A block may also be followed by parentheses "()" which denote a list of inputs, and/or
- brackets "[]" which denote a list of outputs. These are discussed more in 1.5 (Types).
-
-
- If a loop block is followed by a set of parentheses and the last statement is an expression
- resolving in a boolean expression, this expression is evaluated as the exit condition for the loop.
-
- If a loop block is followed by a set of brackets, each expression is evaluated before the loop
- jumps back to the top (and also before the exit condition is evaluated if one exists).
-
-
- An if or if else block must have a set of parentheses whose final statement is an expression
- resolving in a boolean expression. This expression evaluated to decide if the branch is taken or not.
- (if the condition is true the branch is taken, if false, the branch is skipped)
-
- A match block must have a set of parentheses whose final statement is an expression which
- resolves in a type whose data is stored on the stack.
-
- Each case statement must have a constant value directly following. This branch of code is taken
- only if the parent match block's expression resolves in a value matching the case block.
-
- Default block may be used to create a catchall case (for if no other) case was used.
-
-
- The method block represents a set of functions related to a type, and must be followed by parentheses
- which contain a pointer to that type.
-
- The operator block is immediately qualified by a reserved character representing an operator, then
- followed by a set of parentheses representing a pointer to the type the operator can be used on.
-
- The override block may be used if overriding an extended method.
-
-
- The interface block acts similar to the method block, but any methods actually implimented must only
- reference other methods. May contain "operator" and "override" like the method block. May extended
- other interfaces, but not types or structs.
-
-
- The module keyword must be followed immediately by a name for the module using only un-reserved characters.
-
-
- A named or anonymous block may be followed by parentheses indicating parameters, and/or brackets
- indicating return type(s). This is discussed more in 1.5 and 1.8
-
- ";/" ends the code block.
-
-
- 1.2.4: Block redefinition and shortcuts
- Due to the two-character nature of block beginnings and endings, some simple shortcuts have been devised
- to mitigate annoyance at re-defining or swapping between block types.
-
- ";;" closes a code block and opens a new code block.
-
-
- e.g.
-
- /; if
-
- ;; else
-
- ;/
-
-
- "::" closes a pre-processor block and opens a new one.
-
- ";#" closes a code block and opens a comment block.
-
- "#;" closes a comment block and opens a code block.
-
- ":#" closes a pre-processor block and opens a comment block.
-
- "#:" closes a comment block and opens a pre-processor block.
-
-----------------------------------
-
-1.3: Statements
-
-Statements make up the actual "doing" part of TNSL
-
- 1.3.1: Comments
- Comments start with the "#" character and end at the end of the line.
- The compiler will ignore all characters (even other reserved characters) after the # on that line.
-
- If I may be candid for a moment:
- Comments ( # ) should not matter in a comment block, although this is currently a tokenizer bug resulting in some weird behavior.
-
-
- 1.3.2: Pre-processor statements
- Pre-processor statements almost make up a meta-programming language (not solidified yet)
- which influences how code is interpreted by the compiler.
-
- Pre-processor statements start with a ":" character and end at the next line
-
- The pre-processor is discussed more in 2.2
-
-
- 1.3.3: Code statements
- Code statements begin with the ";" character and end at the next ":", ";", or block.
-
- Code statements come in four forms:
-
- 1) Definition (variable, struct, etc.)
- 2) Expression (assignment, increment, etc.)
- 3) Call (function call, block call, etc.)
- 4) Keyword (using a keyword to perform a task)
-
- These forms can all mix to form the final statement.
-
-
- 1.3.4: Keywords
-
- - "struct" to define a new struct type
-
- - "extends" to inherit methods and members from another struct or interface
-
- - "is" to check if a type extends (or equates) another type
-
- - "continue" to continue through a loop
-
- - "break" to end a loop preemptively
-
- - "label" to mark a point to jump to in the code
-
- - "goto" to jump back or forward to the label
-
- - "const" an unchangable value
- (used in variable definition)
-
- - "static" a variable that is kept between block calls, even if it falls out of scope
- (used in variable definition)
-
- - "volatile" a value that may change at any time.
- The compiler will not optimize the value even if optimization is enabled.
- (used in variable definition)
-
- - "self" only for use in methods. References the object the method was called on
- without the need for pointer differentiation.
-
- - "super" only for use in methods on structs or interfaces which inherit from other structs/interfaces.
- references the parent struct's methods if overridden.
-
- - "return" stops the current named (or anonymous) code block to give a value to it's caller
-
-
- 1.3.5: Expressions
- Expressions represent pieces of code which return values.
- These take several forms:
-
- 1) A literal value
- 2) A call to a function or method which returns a value
- 3) A reference to a variable
- 4) An operator combining two or more of the above
-
- More in depth:
-
- 1) Literal values
-
- Literal numbers start with a decimal number, and can contain up to one non-number element:
- 0 - valid
- 0.2341 - valid
- 120000 - valid
-
- .34567 - invalid
- 0asd...kj - invalid
-
- Special bases:
-
- 0b1001 - valid (binary)
- 0o0127 - valid (octal)
- 0xABCD - valid (hex)
- 0BZZZZ - valid (base 64)
-
- These rules will be ammended and are mostly for ease of parsing in the first version of the language.
-
- Literal boolean values are put as such:
- true
- false
-
- Literal string values use "", and \ as an escape character
- "hello, world!" - valid
- "\"" - valid
- "\\" - valid
-
- "\" - invalid
- " - invalid
-
- Literal characters use '' and either an escape character or a single character/code point between them
- ' ' - valid
- '\u2000' - valid
- '\\' - valid
- '\'' - valid
-
- invalid:
- '\u200220202asdasdaw qwe '
- '\\asd asd '
- 'ab'
- '\'
- '
-
- 2) Call with a return
-
- calling a function is as simple as naming a block of code and then calling it with parentheses
-
- # Define
- /; add
- ;/
-
- # Call (not expression)
- ;add()
-
- what makes a call an expression is if it outputs any types
-
- # Define
- /; get_five [int]
- ;return 5
- ;/
-
- # Call (expression)
- ;get_five()
-
-
- 3) A reference to a variable
-
- Fairly straight-forward, an initialized variable on it's own is a value, and thus, an expression.
-
- ;int x = 0
-
- ;x
-
-
- 4) Combining two or more of the above
-
- ;x + get_five()
-
-
- 1.3.6: Ways to define variables
-
- When defining variables, first a variable type must be specified, then a variable name.
- Finally, it may be initialized when defined, or not.
-
- #integer type, uninitialized
- ;int x
-
- #integer type, initialized by literal
- ;int y = 42
-
- #integer type, initialized by expression
- ;int z = y + get_five()
-
- ;x = z*2
-
-----------------------------------
-
-1.4: Types
-
-TNSL's type system consists of built-in types, user interfaces, and user structs.
-The built-in types are always on the stack, while user types can be on the stack or the heap.
-
- 1.4.1: Built-in types
- Built-in types are meant to be portable so that tnsl can be backported to whatever system one may want.
-
- There are four levels:
- REQUIRED - If the machine does not have at least this, we will not officially support it
- SHOULD - Consumer computer systems have had this for a while, so a system really should have it
- LIKELY - Many consumer electronics now have this, so it would be likely to be included
- NICE - Future proofing
- UNLIKELY - These are just wierd things i thought up in the shower
-
- Where a given arch falls on this spectrum (as well as market permiation) relates to how likely we are to support it.
- It also relates to how standards compliant a device is for our language.
-
-
- Types and their sizes:
-
- smallest (REQUIRED to be standards compliant):
- bool - true or false (smallest addressable space on the system)
-
- size 8 (REQUIRED to be standards compliant):
- achar - represents an ascii value
- int8 - -128 to 127
- uint8 - 0 to 255
-
- variable (depends on system) (REQUIRED):
- generic (void) pointer - "~void" can represent an arbitrary memory address part of heap representation
- pointer (for each supported type) - part of heap representation
-
- size 16 (SHOULD to be standards compliant):
- int16 - 16 bit signed int
- uint16 - 16 bit unsigned int
-
- variable (SHOULD):
- type - a type which represents other types.
- uchar - represents a unicode code point
- {}<type> - an array type
-
- size 32 (SHOULD):
- int32 - 32 bit signed int
- uint32 - 32 bit unsigned int
- float32 - 32 bit floating point (single percision)
-
- size 64 (LIKELY):
- int64 - 64 bit signed int
- uint64 - 64 bit unsigned int
- float64 - 64 bit floating point
-
- vect (NICE):
- vector, simd, etc. not really sure how these work yet. I'll get back to you
-
- size 128 (NICE):
- int128
- uint128
- float128
-
-
- 1.4.2: User defined types (stack)
- Any structs defined not using pointers are automatically allocated on the stack.
-
- Structs are normally alligned to byte boundaries
-
- User defined type ids are always >= 64
-
- Defining a struct can be done as follows:
-
- ;struct <struct name> (<list of inputs (makes this a dynamic type)>) { <list of members (may use inputs)> }
-
- e.g.
-
- ;struct Vector2 {int32 x, y}
-
-
- Creating a variable from the struct can be done in two ways:
-
- # one, use the assignment operator
- ;Vector2 vec = Vector2{0, 1}
-
- # two, use the list syntax
- ;Vector2 vec{0, 1}
-
- # also, feel free to set the members in the list brackets (does not have to be in order)
- ;Vector2 vec{x = 0, y = 1}
-
- # re-assignment must always use the following syntax
- ;vec = <expression returning variable type>
-
-
- 1.4.3: User defined methods
- Any type may be expanded by user defined methods on that type.
-
- method block example using operator
-
- /; method Vector2
-
- /; operator + (~Vector2 v)
- ;self.x += `v.x
- ;self.y += `v.y
- ;/
-
- /; dot (~Vector2 v) [int32]
- ;return self.x * `v.x + self.y * `v.y
- ;/
-
- ;/
-
- said mathods may then be called like so
-
- /; some_method
- ;Vector2 vec1 {0, 2}
- ;Vector2 vec2 {1, 4}
-
- # Call
- ;vec1.dot(~vec2) # represents 8
- ;/
-
-
- 1.4.4: Interfaces
- Interfaces exist as a block of mostly un-implimented methods which
- can be implimented by other interfaces, but ultimatly, structs.
-
- Interfaces are types in the sence that objects can be derived from them,
- and type equivalance is possible, but there are never any pure instances
- of interfaces.
-
- To define an interface, create a block with some methods.
- Methods not marked with "override" are considered implimented by the interface,
- and structs do not have to override them.
-
- /;interface Vector
-
- /; override length_sq [int32] ;/
-
- /; override dimension [int8] ;/
-
- ;/
-
- Interfaces can be used by the extends keyword, just like structs.
-
- ; struct Vector2 extends Vector
- {
- x, y int32
- }
-
- # Now, not implimenting will throw an error on compile
- /; method ~Vector2
-
- /; override length_sq [int32]
- ;return self.x*self.x + self.y*self.y
- ;/
-
- /; override dimension [int32]
- ;return 2
- ;/
- ;/
-
-----------------------------------
-
-1.5: Operators
-
- 1.5.1: List of operators
- At the moment, read Appendix A and Appendix B for a list of operators.
-
- 1.5.2: List of reserved characters
- ; : ' " , . < > ~ ` ! # % ^ & * ( ) { } [ ] - = +
-
-----------------------------------
-
-1.6: Borrow checker
-
-IDK man, maybe later
-
-----------------------------------
-
-1.7: Anonymous blocks
-
-This chapter covers anonymous code blocks, where they can be used,
-and how functions are first-class in TNSL
-
- 1.7.1: Anonymous
- In TNSL, like many other languages, we have closures (or lambda expressions if you perfer).
- They have the same type as code blocks (which we havn't really talked about yet),
- and can be passed around as variables as well as called.
-
- The type that functions take on depends on their return value.
-
- Block variables can be written as
-
- ;void(inputs)[outputs] block
-
- i.e.
-
- ;void(int32)[int32] block
- # represents a block which returns a int32 and takes a int32 as a parameter
-
- Anonymous blocks can be written only as scope, or with inputs and outputs for function calls.
-
- /; call_func (void(int32)[int32] to_call) [int32]
- ;return to_call(5)
- ;/
-
- /; provide_anon () [int32]
- ;return call_func(/; (int32 a) [int32]
- ;return a + 1
- ;/)
- ;/
-
- In fact, all functions are special types of expressions which return themselves (a set of other statements).
-
-----------------------------------
-
-1.8: Raw and Asm
-
-Sometimes, the programmer needs to impliment an exact or unique set of instructions
-as lines of assembly to achieve their goal. Raw and Asm allow for that.
-
- 1.8.1: raw
- The raw keyword tells the compiler to leave the entire block (and its contents)
- to the programmer.
-
- When specifying a raw block, the compiler disables the borrow checker on the block and
- everything inside it. It also strips any calling convention from the block if it is a function,
- reducing the open and close of the block to a simple call and ret (or equiv).
-
- If the block is inline, it does even less. Simply carrying forward the instructions passed inside of the block.
-
- Note that any memory allocated inside the block will not be automatically de-allocated as is normal for variables,
- so programmers must take care to make sure there are no memory leaks in the code. Thus, all memoty leaks can be
- traced to a raw block. Any and every block may be raw. This includes the main function.
-
- 1.8.2: asm
- The asm keyword may only be used in blocks marked as raw.
-
- not all the kinks are worked out yet, but the jist is this example:
-
- ;asm "<string of assembly>"
-
- you can also use variables in it as long as they are in scope.
-
- ;asm "mov ax, [some_var]"
-
- This paired with knowing the calling convention (i.e. where TNSL stores variables before and after a function call)
- allows you complete control of the code and execution order if you so wish.
-
-
-----------------------------------
-
-Part 2: Related Features
-
-2.1: Style guide
- As a basic rule, users of the language *should* be using the following style guide when
- writing TNSL programming. Some of the following definitions are arbitrary, but style guides
- are more for consistancy than code quality.
-
- If working on a large project, a differing style guide may be written to the compliment of the
- programmers working on the specific project.
-
- The style guide is not meant as a way to keep people from programming in TNSL, and is not enforceable,
- but should be followed nonetheless for no other reason than consistency in the code base.
-
- All TNSL specification are **heavily** encouraged to follow the style guide for ease of reading.
-
-
- 2.1.1: Comments
- Minimal comments in the code itself unless a particular implementation is fairly obtuse.
-
- Doc comments for functions should explain what the function is/does, not how it does it.
-
- Doc comment blocks should start with "/##" and end at the function or method written with "#;"
-
- e.g.
-
- /## main is the entry point for the program
-
- #; main
-
- ;/
-
-
- 2.1.2: Variable names
- Variable names should be as descriptive as they need.
- It is encouraged to make parameters more descriptive so others can see
- what they might need to call a function.
-
- Otherwise single letters, snake_case, and other common cases are just fine
- as long as it is fairly clear what is going on.
-
- constants should be in UPPER_CASE
-
- 2.1.3: Function and module names
- It is recommended to use snake_case
-
-----------------------------------
-
-2.2: The pre-processor
-
-
-----------------------------------
-
-Part 3: The TNSL Calling ABI
-
-Honestly I'm not that versed in assembly, I need to read up >_<
-
-
-----------------------------------
-
-Part 4: Standard libs
-
-4.1: Bare-metal
-
-
-4.2: libts
- libts is an effort to create a TNSL standard library and is too large for this document.
- Please read related spec text documents. *(discussed more in libts.txt)
-
-
-4.3: Cross Call libc
-
-
-----------------------------------
-
-Appendix A: Reserved Characters and their Uses
-
-( - Starting condition/expression mark open
-
-) - Starting condition/expression mark close
-
-[ - Ending condition/expression mark open
-
-] - Ending condition/expression mark close
-
-{ - Array/Set mark open
-
-} - Array/Set mark close
-
-: - Pre-processor Statement/Directive
-
-; - Code Statement
-
-# - Comment Statement
-
-, - Separates arguments or in-line statements
-
-= - Assignment operator
-
-. - Get operator (from struct or module)
-
-& - Bit-wise and operator
-
-| - Bit-wise or operator
-
-^ - Bit-wise xor operator
-
-> - Greater than boolean operator
-
-< - Less than boolean operator
-
-! - Not prefix for boolean expression
-
-+ - Addition/concat operator
-
-- - Subtraction operator
-
-* - Multiplication operator
-
-/ - Division operator
-
-% - Modulo operator
-
-~ - Address of (and define pointer type) operator
-
-` - Pointer de-reference operator
-
-
-----------------------------------
-
-Appendix B: Multi-Character Operators
-
-/; - Code block mark open
-
-;/ - Code block mark close
-
-/: - Pre-processor block mark open
-
-:/ - Pre-processor block mark close
-
-/# - Comment block mark open
-
-#/ - Comment block mark close
-
-;; - Redefine code block (acts as a shortcut for ;//;)
-
-:: - Redefine Pre-processor block (acts as shortcut for ://:)
-
-;# - Switch from code block to comment block (;//#)
-
-:# - Shortcut (://#)
-
-#; - Shortcut (#//;)
-
-#: - Shortcut (#//:)
-
-== - Boolean equals
-
-&& - Boolean and
-
-|| - Boolean or
-
-<< - Bit-wise l-shift
-
->> - Bit-wise r-shift
-
-++ - Increment
-
--- - De-Increment
-
-
-Augmented assignment operators (a = a <op> b) = (a <op>= b)
-&=
-
-|=
-
-^=
-
-+=
-
--=
-
-*=
-
-/=
-
-%=
-
-~=
-
-`=
-
-
-Augmented boolean operators (a !<op> b) = !(a <op> b)
-!& - NAND
-
-!| - NOR
-
-!^ - XAND
-
-!== - Boolean equals
-
-!&&
-
-!||
-
-!>
-
-!<
-
->== - Same as !<
-
-<== - Same as !>
-
-----------------------------------
-
-Appendix C: Memory control (and speed) with each type of struct
-
-Each type of user-definable type or struct or interface grants
-it's own level of memory control. These (and their ramifications) are
-listed here from low to high.
-
----
-
-High level, low control structs (dynamic structs) are created when using
-the parameters for structs/types. They allow variable length which can
-house different information at the cost of speed, memory, and control.
-
-These are the only type of structs which can house other dynamic structs.
-Dynamic structs can only be passes by reference due to undefined size at
-compilation.
-
----
-
-Medium level, medium control structs (type structs) are created normaly
-through the struct keyword without parameters. These structs are fixed
-length, but the compiler encodes extra info into them. This means they
-get method resolution and override checks which may reduce speed of the
-application.
-
----
-
-Low level, high control structs (raw structs) are created using the "raw"
-keyword before the "struct" keyword. There are no frills, and method
-resolution is not performed for these structs. These structs may not
-extend or be extended. They may, however, implement interfaces. They
-perform as a "what you see is what you get" kind of memory model. They
-may not use parameters, and all internal types must be consistant length
-(no dynamic structs or dynamic type identifiers).
-
----
-
-To summerize:
-All these structs can encode the same info, but as you get lower to
-the system, you get the bonus of speed and control while losing higher
-level functions provided by the language.
-
-This shouldn't matter much to most programmers unless they are doing
-embedded development, systems programming, or firmware programming,
-but it is still a consideration to make for time-sensitive applications. \ No newline at end of file
diff --git a/src/tparse/tree-preproc.go b/src/tparse/tree-preproc.go
new file mode 100644
index 0000000..d0f6637
--- /dev/null
+++ b/src/tparse/tree-preproc.go
@@ -0,0 +1,49 @@
+/*
+ Copyright 2020 Kyle Gunger
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package tparse
+
+func parsePreBlock (tokens *[]Token, tok, max int) (Node, int) {
+ out := Node{IsBlock: true}
+ out.Data = Token{Type: 11, Data: (*tokens)[tok].Data}
+
+ tok++
+
+ for ; tok < max; tok++ {
+ t := (*tokens)[tok]
+
+ if t.Data == ":/" {
+ break
+ }
+
+ tmp := Node{Data: t, IsBlock: false}
+ out.Sub = append(out.Sub, tmp)
+ }
+
+ return out, tok
+}
+
+func parsePre (tokens *[]Token, tok, max int) (Node, int) {
+ out := Node{IsBlock: false}
+ out.Data = Token{Type: 11, Data: (*tokens)[tok].Data}
+
+ tok++
+
+ tmp := Node{Data: (*tokens)[tok], IsBlock: false}
+ out.Sub = append(out.Sub, tmp)
+
+ return out, tok
+} \ No newline at end of file
diff --git a/src/tparse/tree-value.go b/src/tparse/tree-value.go
index d03383e..7880a8b 100644
--- a/src/tparse/tree-value.go
+++ b/src/tparse/tree-value.go
@@ -19,18 +19,20 @@ package tparse
// Ops order in TNSL
// Cast/Paren > Address > Get > Inc/Dec > Math > Bitwise > Logic
-var ORDER = map[string]int{
- // Address of
+var UNARY = map[string]int {
"~": 0,
- // De-ref
"`": 0,
+ "++": 2,
+ "--": 2,
+ "!": 6,
+}
+
+var ORDER = map[string]int{
// Get
".": 1,
- // Inc/Dec
- "++": 2,
- "--": 2,
+ "is": 2,
// Multiplication
"*": 3,
@@ -61,9 +63,6 @@ var ORDER = map[string]int{
"!|": 6,
"!^": 6,
- // Not (prefix any bool or bitwise)
- "!": 6,
-
// Boolean and
"&&": 7,
// Boolean or
@@ -82,23 +81,133 @@ var ORDER = map[string]int{
"!>": 7,
"!<": 7,
+
+ // Assignement
+ "=": 8,
+}
+
+// Works? Please test.
+func parseUnaryOps(tokens *[]Token, tok, max int) (Node) {
+ out := Node{Data: Token{Type: 10, Data: "value"}, IsBlock: false}
+ val := false
+
+ // Pre-value op scan
+ for ; tok < max && !val; tok++ {
+ t := (*tokens)[tok]
+ switch t.Type {
+ case DEFWORD:
+ fallthrough
+ case LITERAL:
+ out.Sub = append(out.Sub, Node{Data: t, IsBlock: false})
+ val = true
+ case AUGMENT:
+ _, prs := UNARY[t.Data]
+ if !prs {
+ errOut("Parser bug! Operator failed to load into AST.", t)
+ } else {
+ out.Sub = append(out.Sub, Node{Data: t, IsBlock: false})
+ }
+ default:
+ errOut("Unexpected token in value declaration", t)
+ }
+ }
+
+ // Sanity check: make sure there's actually a value here
+ if !val {
+ errOut("Expected to find value, but there wasn't one", (*tokens)[max])
+ }
+
+ // Post-value op scan
+ for ; tok < max; tok++ {
+ t := (*tokens)[tok]
+ switch t.Type {
+ case DELIMIT:
+ var tmp Node
+ switch t.Data {
+ case "(": // Function call
+ //TODO: parse list of values here
+ case "[": // Typecasting
+ tmp, tok = parseType(tokens, tok, max, false)
+ out.Sub = append(out.Sub, tmp)
+ case "{": // Array indexing
+ tmp = Node{Data: Token{Type: 10, Data: "index"}}
+ var tmp2 Node
+ tmp2, tok = parseValue(tokens, tok + 1, max)
+ tmp.Sub = append(tmp.Sub, tmp2)
+ out.Sub = append(out.Sub, tmp)
+ default:
+ errOut("Unexpected delimiter when parsing value", t)
+ }
+ case AUGMENT:
+ _, prs := UNARY[t.Data]
+ if !prs {
+ errOut("Parser bug! Operator failed to load into AST.", t)
+ } else {
+ out.Sub = append(out.Sub, Node{Data: t, IsBlock: false})
+ }
+ default:
+ errOut("Unexpected token in value declaration", t)
+ }
+ }
+
+ return out
+}
+
+// Works? Please test.
+func parseBinaryOp(tokens *[]Token, tok, max int) (Node) {
+ out := Node{IsBlock: false}
+ first := tok
+ var high, highOrder, bincount int = first, 8, 0
+
+ // Find first high-order op
+ for ; tok < max; tok++ {
+ t := (*tokens)[tok]
+ if t.Type == AUGMENT {
+ order, prs := ORDER[t.Data]
+ if !prs {
+ continue
+ } else if order > highOrder {
+ high, highOrder = tok, order
+ }
+ // TODO: Add in case for the "is" operator
+ bincount++
+ }
+ }
+
+ out.Data = (*tokens)[high]
+
+ if bincount == 0 {
+ // No binops means we have a value to parse. Parse all unary ops around it.
+ return parseUnaryOps(tokens, first, max)
+ } else {
+ // Recursive split to lower order operations
+ out.Sub = append(out.Sub, parseBinaryOp(tokens, first, high))
+ out.Sub = append(out.Sub, parseBinaryOp(tokens, high + 1, max))
+ }
+
+ return out
}
+// TODO: fix this
func parseValue(tokens *[]Token, tok, max int) (Node, int) {
- out := Node{}
+ first := tok
+
for ; tok < max; tok++ {
t := (*tokens)[tok]
switch t.Type {
- case LITERAL:
- case DEFWORD:
+ case LINESEP:
+ case INLNSEP:
case DELIMIT:
+ case AUGMENT:
+ case LITERAL:
}
}
- return out, tok
+ return parseBinaryOp(tokens, first, tok), tok
}
+// TODO: make sure this actually works
func parseVoidType(tokens *[]Token, tok, max int) (Node, int) {
out := Node{}
working := &out
@@ -148,8 +257,9 @@ func parseVoidType(tokens *[]Token, tok, max int) (Node, int) {
return out, tok
}
+// TODO: make sure this actually works
func parseType(tokens *[]Token, tok, max int, param bool) (Node, int) {
- out := Node{}
+ out := Node{Data: Token{Type: 10, Data: "type"}}
working := &out
for ; tok < max; tok++ {
diff --git a/src/tparse/tree.go b/src/tparse/tree.go
index 08c94bf..af3e184 100644
--- a/src/tparse/tree.go
+++ b/src/tparse/tree.go
@@ -44,9 +44,9 @@ func MakeTree(tokens *[]Token, file string) Node {
case ";":
tmp, tok = parseStatement(tokens, tok, max)
case "/:":
- tmp = Node{}
+ tmp, tok = parsePreBlock(tokens, tok + 1, max)
case ":":
- tmp = Node{}
+ tmp, tok = parsePre(tokens, tok + 1, max)
default:
errOut("Unexpected token in file root", t)
}
diff --git a/src/tparse/type.go b/src/tparse/type.go
index b860f02..7113c0b 100644
--- a/src/tparse/type.go
+++ b/src/tparse/type.go
@@ -56,6 +56,7 @@ var PREWORDS = []string{
"else",
"abi",
//"mark",
+ "using",
}
func checkPreWord(s string) int {
@@ -92,7 +93,7 @@ var RESWORD = map[string]int{
"struct": KEYWORD,
"interface": KEYWORD,
"enum": KEYWORD,
- "is": KEYWORD,
+ "is": AUGMENT,
"extends": KEYWORD,
"loop": KEYWORD,
@@ -126,6 +127,8 @@ var RESWORD = map[string]int{
"true": LITERAL,
"false": LITERAL,
+ "alloc": KEYWORD,
+ "calloc": KEYWORD,
"delete": KEYWORD,
"module": KEYWORD,