diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rwxr-xr-x[-rw-r--r--] | gobuild.sh | 4 | ||||
-rw-r--r-- | small-tests/examp.tnsl | 14 | ||||
-rw-r--r-- | spec/compiler/compiler.txt | 23 | ||||
-rw-r--r-- | spec/compiler/pre-processor.txt | 9 | ||||
-rw-r--r-- | spec/compiler/tokenizer.txt | 0 | ||||
-rw-r--r-- | spec/iex/iex-spec.txt | 128 | ||||
-rw-r--r-- | spec/libts.txt | 93 | ||||
-rw-r--r-- | spec/spec.txt | 928 | ||||
-rw-r--r-- | src/tparse/tree-preproc.go | 49 | ||||
-rw-r--r-- | src/tparse/tree-value.go | 138 | ||||
-rw-r--r-- | src/tparse/tree.go | 4 | ||||
-rw-r--r-- | src/tparse/type.go | 5 |
13 files changed, 189 insertions, 1208 deletions
@@ -2,4 +2,4 @@ build/ .vscode/ *-test.tnp -*-test.tnt
\ No newline at end of file +*.tnt
\ No newline at end of file diff --git a/gobuild.sh b/gobuild.sh index daa0a55..e85b718 100644..100755 --- a/gobuild.sh +++ b/gobuild.sh @@ -2,7 +2,7 @@ SRCDIR=$(pwd) -GOPATH="$GOPATH:$SRCDIR" -GO111MODULE=off +export GOPATH="$GOPATH:$SRCDIR" +export GO111MODULE="off" go build -o build/${1} src/${1}.go diff --git a/small-tests/examp.tnsl b/small-tests/examp.tnsl index 0c32f3f..196db40 100644 --- a/small-tests/examp.tnsl +++ b/small-tests/examp.tnsl @@ -1,6 +1,6 @@ #Comment like this -/## +/# Or like this (blocks begin with /<symbol> and end with <symbol>/) Block Comment #/ @@ -9,8 +9,8 @@ #/ # Preprocessor directives are like this -# Import from library using ' -:import 'what/what.tnsl' a +# Import an external module from library using ' +:import 'what' # Import from local file using " :import "what/what.tnsl" @@ -65,11 +65,11 @@ -# The struct keyword is followed by [name] {values} -;struct [s1] {string Name, string Message = "Default message (c-style strings)"} +# The struct keyword is followed by <name> {values} +;struct s1 {string Name, string Message = "Default message (c-style strings)"} # Most people should declare as such: -;struct [s1] { +;struct s1 { string Name, string Message = "Default message (c-style strings)" } @@ -156,7 +156,7 @@ # Dumb generic type struct -; struct [gen] (type T) { +; struct gen (type T) { T i } diff --git a/spec/compiler/compiler.txt b/spec/compiler/compiler.txt deleted file mode 100644 index df7a591..0000000 --- a/spec/compiler/compiler.txt +++ /dev/null @@ -1,23 +0,0 @@ -The "compiler" of the TNSL language is formed of several parts, each discussed -in their own file. - - 1) Parsing - - 1a) The tokenizer - chews up .tnsl files and spits out tokens - 1b) The file resolver - interperets import commands and creates a tree of files to compile - 1c) The definer - - 1d) The AST generator - indexes tokens and provides some rudimentary syntax checking - - 2) Validation - - 2a) The expander - expands pre-processor commands and macro operations - 2b) The validator - checks all requirements for all functions and expressions are valid - - 3) Translation - - 3a) Variable resolver - chooses and creates an internal model of where variables are on the stack, as well as calling conventions for functions - 3b) Type translator - places and translates types (internal and external) - 3c) Container resolver - checks container (elf, exe, dll, app, out, etc.) and decides how to map the types for future linking - 3d) Final translation - translates all logic and expressions into native binary code - 3e) Optimization - 3f) Container Writer - writes the final output of the previous steps into the specified container diff --git a/spec/compiler/pre-processor.txt b/spec/compiler/pre-processor.txt deleted file mode 100644 index 8297e7d..0000000 --- a/spec/compiler/pre-processor.txt +++ /dev/null @@ -1,9 +0,0 @@ -A set of components in the compiler (all before the translator) which expand, -resolve, and validate pre-processor directives into code which can be compiled - -Consists of the resolver in the parsor. -The expander and part of the indexer in the validator. - -Special (compiler specififc) definitions can be added with _<compiler>_<variable name> - -E.X. if the official implementation wanted to provide the exact time of the compilation, it could use _TNSLC_TIME_NOW for instance
\ No newline at end of file diff --git a/spec/compiler/tokenizer.txt b/spec/compiler/tokenizer.txt deleted file mode 100644 index e69de29..0000000 --- a/spec/compiler/tokenizer.txt +++ /dev/null diff --git a/spec/iex/iex-spec.txt b/spec/iex/iex-spec.txt deleted file mode 100644 index 6979cfc..0000000 --- a/spec/iex/iex-spec.txt +++ /dev/null @@ -1,128 +0,0 @@ -This is the IEX file specification. - -Document version (semver): 0.0.1 -Main Author: Kyle Gunger - -License: Apache 2.0 - ----------------------------------- - -Contents: - -Organization - ----------------------------------- - -Organization - -Magic number starts the file "IEX" or 0x49 0x45 0x58 - -The header of the file can be represented as such - -;struct IEX_HEAD { - raw {3}char # Always "IEX" - magic, - - {}char - name, - arch, - os, - - uint8 # Version info - major, - minor, - patch, - # OS abi info - os_major, - os_minor, - os_patch, - - bool # Tells loader that the file holds a main function - can_execute, - - ~void ({}{}char args) [int] - # Address of main in file - start_addr, - - {}IEX_SECTION - sections, - - {}IEX_LIB - dependencies, - - IEX_MODULE - self -} - -a section is defined as - -;struct IEX_SECTION { - {}char - name, - - uint8 # Denotes dependency, symbol table, data, bss, text, etc. - type, - - ~void # Points to start and end of section - start, - end -} - -;struct IEX_LIB { - {}char - name, - - uint8 # Version info - major, - minor, - patch -} - -;struct IEX_MODULE { - {}char - name, - - {}IEX_FUNCTION - func, - - {}IEX_TYPE - types, - - {}IEX_MODULE - sub -} - -;struct IEX_FUNCTION { - {}char - name, - - uint32 - overload, - bytes_in, - bytes_out, - - ~void - addr -} - -;struct IEX_TYPE { - {}char - name, - - bool - raw_struct, - interface, - dynamic, - - uint32 - size, - - {}IEX_FUNCTION - methods, - - {}IEX_TYPE - supers, - - ~void - addr -} diff --git a/spec/libts.txt b/spec/libts.txt deleted file mode 100644 index e31d59d..0000000 --- a/spec/libts.txt +++ /dev/null @@ -1,93 +0,0 @@ -This is the LIBTS specification, a document related to the definition of the TNSL language, -meta-language, it's usage, and where it's standard libraries may be ported. - -Document version (semver): 0.0.1 -Main Author: Kyle Gunger - -License: Apache 2.0 - ----------------------------------- - -Contents: - -Part 1 -- About the Library - 1.1: libts and TNSL - 1.2: Short overview - -Part 2 -- Library Features - 2.1: Bitwise operations - 2.2: Standard algorithms - 2.3: Continer structs - 2.4: Standard abstractions - 2.5: Time related APIs - 2.6: Method resolution - ----------------------------------- - -Part 1: About the Library - ----------------------------------- - -Section 1: libts and TNSL - - libts or the TNSL standard library is a library to be found on almost all systems containing - a TNSL implimentation. The library contains both common APIs which ease common programming - feats as well as core program logic for the more high-level features of TNSL. TNSL can - indeed run without a libts, however doing so limits many parts of the language to the "raw" - variants. These limited features include structs, types, and interfaces primarially. - - libts contains a standard method resolution algorithm such that method calls can be resolved - on runtime for the high level dynamic type system. - ----------------------------------- - -Section 2: Short overview - - The library contains many standard algoritims and abstractions found in other languages such - as C, as well as helpers for these containers. - - ----------------------------------- - -Part 2: Library Features - ----------------------------------- - -Section 1: Bitwise Operations - ----------------------------------- - -Section 2: Standard algorithms - ----------------------------------- - -Section 3: Continer structs - ----------------------------------- - -Section 4: Standard abstractions - ----------------------------------- - -Section 5: Time related APIs - - Time is important for any program, and libts provides an interface through which to query - the operating system for it. libts also provides conversion, projection, and timing features - so that programs can make use of user inputs relating to time. If bugs in the time projection - of the library, they will be fixed. No reguard to existing data should be given, accuracy is - the goal above breakage. - - That being said, the library should attempt to work around erronious data in a way that - provides an alternative date if the data is in a good format but pointing to a non-existant - date. An error code will still be thrown in these cases, but indicating that the date does - not exist, not that the input was complete garbage. Programs may catch errors in any way - they choose. - ----------------------------------- - -Section 6: Method resolution - - As previously stated, the library performs method resolution for calls on high-level types. - In an iex file, this is done by finding the associated IEX_TYPE of the object and following - along the "supers" array and checking with the "methods" arrays until the IEX_TYPE contains - a definition for the method called which fits the caller's arguments.
\ No newline at end of file diff --git a/spec/spec.txt b/spec/spec.txt deleted file mode 100644 index 4086d3c..0000000 --- a/spec/spec.txt +++ /dev/null @@ -1,928 +0,0 @@ -This is the TNSL specification, a document related to the definition of the TNSL language, -meta-language, it's usage, and where it's compiler may be ported. - -Document version (semver): 0.0.1 -Main Author: Kyle Gunger - -License: Apache 2.0 - ----------------------------------- - -Contents: - -Preamble - -Part 1 -- The Language - 1.1: .tnsl Files - 1.2: Blocks - 1.3: Statements - 1.4: Types - 1.5: Operators - 1.6: Borrow checker - 1.7: Anonymous blocks - 1.8: Raw and Asm - -Part 2 -- Related Features - 2.1: Style guide - 2.2: The pre-processor - 2.3: Compiler options - 2.4: Included tools - -Part 3 -- The TNSL Calling ABI - 3.1: - -Part 4 -- Standard libs - 4.1: Bare-metal - 4.2: libts - 4.3: Cross call libc - -Appendix A: Reserved Characters and their Uses -Appendix B: Multi-Character Operators - ----------------------------------- - -Preamble - -The past few years have seen an explosion of languages trying to break into -the space C has held for so long, with low to moderate success. TNSL's -primary goal is not "fix" C into some every man's language, the hope is that -TNSL will be relatively nice to program in for *programmers*, and that this -quality will be rewarding to learn for those new to the field. TNSL's goal -is more to de-mistify some of the hard edges which make C difficult to -*completely* grasp for new programmers. By making the specification and -documentation open and free, we hope that new and old programmers can -contribute and easily experience a nice language, so once a novice goes out -into the real-world of TNSL, it won't be too far removed from the documentation -of the language at its core. We also hope that TNSL will be compitant enough -to be loved by veterans of the field as well, and make life slightly easier for -those wierdos who still love to program without safty. - -Though TNSL does not completely resemble those who have inspired it, you will -find that many features present in languages such as C, Golang, Rust, Java, -and C++ still live on in these halls. - -Welcome to TNSL - - ----------------------------------- - -Part 1: The Language - ----------------------------------- - -Section 1: .tnsl files - -The high-level files containing code in the TNSL language contain the .tnsl extension. -Each file may contain 0 or more of the following: - - 1) Comments - - 2) Pre-processor statements - - 3) Variable definitions - - 4) Named code blocks - - 5) Method blocks - - 6) Module blocks - -Code blocks and method blocks are the only blocks which may contain statements (1.3) and logical blocks. - ----------------------------------- - -Section 2: Blocks - -TNSL files will consist primarily of blocks both with user-defined names and language-defined keywords. -Standard blocks (and their modifiers) are listed here. - -Each block in TNSL starts with a forward slash ( / ) followed by the character of the block. -The block ends with the reverse of these characters, or at special multi-character boundaries. - - - 1.2.1: The Comment Block - "/#" starts the comment block, and "#/" ends the comment block. - Any text within the block will be ignored by the pre-processor and compiler. - - - 1.2.2: The Pre-Processor Block - "/:" starts the pre-processor block, which is usually followed by a pre-processor keyword. - - Every line in the pre-processor block acts as if the keyword specified at the start of the block was placed - at the beginning of the line. - - E.g. - - /: import - 'math' - "physics" - :/ - - represents the following two pre-processor statements: - - : import 'math' - : import "physics" - - ":/" ends the block. - - - 1.2.3: The Code Block - "/;" starts the code block. - - The code block may be followed immediately by the following qualifiers: - - - "loop" to represent a looping block. - - - "if" to represent the start of an if chain. - - - "if else" to represent the start of a secondary if case. - - - "else" to represent the fallback block of an if chain. - - - "match" to represent the beginning of a match (or switch) block. - - - "case" (inside match only) to represent the start of a case block. - - - "default" catchall case - - - "method" to represent a set of methods for use on a type. - - - "override" to replace an extended method with a new one. - - - "operator" (inside method only) to represent an operator overload. - - - "interface" to create a set of methods that a struct must impliment if it extends the interface. - - - "module" to represent a set of related functions, types, methods, and other modules. - (in other languages this is sometimes referred to as a namespace) - - - A non-keyword consisting of unreserved characters to "name" a block as a function or method. - - - If none of the above are put after the start of a block then the block is considered anonymous. - This is discussed more in 1.8 (Anonymous Blocks) - - - A block may also be followed by parentheses "()" which denote a list of inputs, and/or - brackets "[]" which denote a list of outputs. These are discussed more in 1.5 (Types). - - - If a loop block is followed by a set of parentheses and the last statement is an expression - resolving in a boolean expression, this expression is evaluated as the exit condition for the loop. - - If a loop block is followed by a set of brackets, each expression is evaluated before the loop - jumps back to the top (and also before the exit condition is evaluated if one exists). - - - An if or if else block must have a set of parentheses whose final statement is an expression - resolving in a boolean expression. This expression evaluated to decide if the branch is taken or not. - (if the condition is true the branch is taken, if false, the branch is skipped) - - A match block must have a set of parentheses whose final statement is an expression which - resolves in a type whose data is stored on the stack. - - Each case statement must have a constant value directly following. This branch of code is taken - only if the parent match block's expression resolves in a value matching the case block. - - Default block may be used to create a catchall case (for if no other) case was used. - - - The method block represents a set of functions related to a type, and must be followed by parentheses - which contain a pointer to that type. - - The operator block is immediately qualified by a reserved character representing an operator, then - followed by a set of parentheses representing a pointer to the type the operator can be used on. - - The override block may be used if overriding an extended method. - - - The interface block acts similar to the method block, but any methods actually implimented must only - reference other methods. May contain "operator" and "override" like the method block. May extended - other interfaces, but not types or structs. - - - The module keyword must be followed immediately by a name for the module using only un-reserved characters. - - - A named or anonymous block may be followed by parentheses indicating parameters, and/or brackets - indicating return type(s). This is discussed more in 1.5 and 1.8 - - ";/" ends the code block. - - - 1.2.4: Block redefinition and shortcuts - Due to the two-character nature of block beginnings and endings, some simple shortcuts have been devised - to mitigate annoyance at re-defining or swapping between block types. - - ";;" closes a code block and opens a new code block. - - - e.g. - - /; if - - ;; else - - ;/ - - - "::" closes a pre-processor block and opens a new one. - - ";#" closes a code block and opens a comment block. - - "#;" closes a comment block and opens a code block. - - ":#" closes a pre-processor block and opens a comment block. - - "#:" closes a comment block and opens a pre-processor block. - ----------------------------------- - -1.3: Statements - -Statements make up the actual "doing" part of TNSL - - 1.3.1: Comments - Comments start with the "#" character and end at the end of the line. - The compiler will ignore all characters (even other reserved characters) after the # on that line. - - If I may be candid for a moment: - Comments ( # ) should not matter in a comment block, although this is currently a tokenizer bug resulting in some weird behavior. - - - 1.3.2: Pre-processor statements - Pre-processor statements almost make up a meta-programming language (not solidified yet) - which influences how code is interpreted by the compiler. - - Pre-processor statements start with a ":" character and end at the next line - - The pre-processor is discussed more in 2.2 - - - 1.3.3: Code statements - Code statements begin with the ";" character and end at the next ":", ";", or block. - - Code statements come in four forms: - - 1) Definition (variable, struct, etc.) - 2) Expression (assignment, increment, etc.) - 3) Call (function call, block call, etc.) - 4) Keyword (using a keyword to perform a task) - - These forms can all mix to form the final statement. - - - 1.3.4: Keywords - - - "struct" to define a new struct type - - - "extends" to inherit methods and members from another struct or interface - - - "is" to check if a type extends (or equates) another type - - - "continue" to continue through a loop - - - "break" to end a loop preemptively - - - "label" to mark a point to jump to in the code - - - "goto" to jump back or forward to the label - - - "const" an unchangable value - (used in variable definition) - - - "static" a variable that is kept between block calls, even if it falls out of scope - (used in variable definition) - - - "volatile" a value that may change at any time. - The compiler will not optimize the value even if optimization is enabled. - (used in variable definition) - - - "self" only for use in methods. References the object the method was called on - without the need for pointer differentiation. - - - "super" only for use in methods on structs or interfaces which inherit from other structs/interfaces. - references the parent struct's methods if overridden. - - - "return" stops the current named (or anonymous) code block to give a value to it's caller - - - 1.3.5: Expressions - Expressions represent pieces of code which return values. - These take several forms: - - 1) A literal value - 2) A call to a function or method which returns a value - 3) A reference to a variable - 4) An operator combining two or more of the above - - More in depth: - - 1) Literal values - - Literal numbers start with a decimal number, and can contain up to one non-number element: - 0 - valid - 0.2341 - valid - 120000 - valid - - .34567 - invalid - 0asd...kj - invalid - - Special bases: - - 0b1001 - valid (binary) - 0o0127 - valid (octal) - 0xABCD - valid (hex) - 0BZZZZ - valid (base 64) - - These rules will be ammended and are mostly for ease of parsing in the first version of the language. - - Literal boolean values are put as such: - true - false - - Literal string values use "", and \ as an escape character - "hello, world!" - valid - "\"" - valid - "\\" - valid - - "\" - invalid - " - invalid - - Literal characters use '' and either an escape character or a single character/code point between them - ' ' - valid - '\u2000' - valid - '\\' - valid - '\'' - valid - - invalid: - '\u200220202asdasdaw qwe ' - '\\asd asd ' - 'ab' - '\' - ' - - 2) Call with a return - - calling a function is as simple as naming a block of code and then calling it with parentheses - - # Define - /; add - ;/ - - # Call (not expression) - ;add() - - what makes a call an expression is if it outputs any types - - # Define - /; get_five [int] - ;return 5 - ;/ - - # Call (expression) - ;get_five() - - - 3) A reference to a variable - - Fairly straight-forward, an initialized variable on it's own is a value, and thus, an expression. - - ;int x = 0 - - ;x - - - 4) Combining two or more of the above - - ;x + get_five() - - - 1.3.6: Ways to define variables - - When defining variables, first a variable type must be specified, then a variable name. - Finally, it may be initialized when defined, or not. - - #integer type, uninitialized - ;int x - - #integer type, initialized by literal - ;int y = 42 - - #integer type, initialized by expression - ;int z = y + get_five() - - ;x = z*2 - ----------------------------------- - -1.4: Types - -TNSL's type system consists of built-in types, user interfaces, and user structs. -The built-in types are always on the stack, while user types can be on the stack or the heap. - - 1.4.1: Built-in types - Built-in types are meant to be portable so that tnsl can be backported to whatever system one may want. - - There are four levels: - REQUIRED - If the machine does not have at least this, we will not officially support it - SHOULD - Consumer computer systems have had this for a while, so a system really should have it - LIKELY - Many consumer electronics now have this, so it would be likely to be included - NICE - Future proofing - UNLIKELY - These are just wierd things i thought up in the shower - - Where a given arch falls on this spectrum (as well as market permiation) relates to how likely we are to support it. - It also relates to how standards compliant a device is for our language. - - - Types and their sizes: - - smallest (REQUIRED to be standards compliant): - bool - true or false (smallest addressable space on the system) - - size 8 (REQUIRED to be standards compliant): - achar - represents an ascii value - int8 - -128 to 127 - uint8 - 0 to 255 - - variable (depends on system) (REQUIRED): - generic (void) pointer - "~void" can represent an arbitrary memory address part of heap representation - pointer (for each supported type) - part of heap representation - - size 16 (SHOULD to be standards compliant): - int16 - 16 bit signed int - uint16 - 16 bit unsigned int - - variable (SHOULD): - type - a type which represents other types. - uchar - represents a unicode code point - {}<type> - an array type - - size 32 (SHOULD): - int32 - 32 bit signed int - uint32 - 32 bit unsigned int - float32 - 32 bit floating point (single percision) - - size 64 (LIKELY): - int64 - 64 bit signed int - uint64 - 64 bit unsigned int - float64 - 64 bit floating point - - vect (NICE): - vector, simd, etc. not really sure how these work yet. I'll get back to you - - size 128 (NICE): - int128 - uint128 - float128 - - - 1.4.2: User defined types (stack) - Any structs defined not using pointers are automatically allocated on the stack. - - Structs are normally alligned to byte boundaries - - User defined type ids are always >= 64 - - Defining a struct can be done as follows: - - ;struct <struct name> (<list of inputs (makes this a dynamic type)>) { <list of members (may use inputs)> } - - e.g. - - ;struct Vector2 {int32 x, y} - - - Creating a variable from the struct can be done in two ways: - - # one, use the assignment operator - ;Vector2 vec = Vector2{0, 1} - - # two, use the list syntax - ;Vector2 vec{0, 1} - - # also, feel free to set the members in the list brackets (does not have to be in order) - ;Vector2 vec{x = 0, y = 1} - - # re-assignment must always use the following syntax - ;vec = <expression returning variable type> - - - 1.4.3: User defined methods - Any type may be expanded by user defined methods on that type. - - method block example using operator - - /; method Vector2 - - /; operator + (~Vector2 v) - ;self.x += `v.x - ;self.y += `v.y - ;/ - - /; dot (~Vector2 v) [int32] - ;return self.x * `v.x + self.y * `v.y - ;/ - - ;/ - - said mathods may then be called like so - - /; some_method - ;Vector2 vec1 {0, 2} - ;Vector2 vec2 {1, 4} - - # Call - ;vec1.dot(~vec2) # represents 8 - ;/ - - - 1.4.4: Interfaces - Interfaces exist as a block of mostly un-implimented methods which - can be implimented by other interfaces, but ultimatly, structs. - - Interfaces are types in the sence that objects can be derived from them, - and type equivalance is possible, but there are never any pure instances - of interfaces. - - To define an interface, create a block with some methods. - Methods not marked with "override" are considered implimented by the interface, - and structs do not have to override them. - - /;interface Vector - - /; override length_sq [int32] ;/ - - /; override dimension [int8] ;/ - - ;/ - - Interfaces can be used by the extends keyword, just like structs. - - ; struct Vector2 extends Vector - { - x, y int32 - } - - # Now, not implimenting will throw an error on compile - /; method ~Vector2 - - /; override length_sq [int32] - ;return self.x*self.x + self.y*self.y - ;/ - - /; override dimension [int32] - ;return 2 - ;/ - ;/ - ----------------------------------- - -1.5: Operators - - 1.5.1: List of operators - At the moment, read Appendix A and Appendix B for a list of operators. - - 1.5.2: List of reserved characters - ; : ' " , . < > ~ ` ! # % ^ & * ( ) { } [ ] - = + - ----------------------------------- - -1.6: Borrow checker - -IDK man, maybe later - ----------------------------------- - -1.7: Anonymous blocks - -This chapter covers anonymous code blocks, where they can be used, -and how functions are first-class in TNSL - - 1.7.1: Anonymous - In TNSL, like many other languages, we have closures (or lambda expressions if you perfer). - They have the same type as code blocks (which we havn't really talked about yet), - and can be passed around as variables as well as called. - - The type that functions take on depends on their return value. - - Block variables can be written as - - ;void(inputs)[outputs] block - - i.e. - - ;void(int32)[int32] block - # represents a block which returns a int32 and takes a int32 as a parameter - - Anonymous blocks can be written only as scope, or with inputs and outputs for function calls. - - /; call_func (void(int32)[int32] to_call) [int32] - ;return to_call(5) - ;/ - - /; provide_anon () [int32] - ;return call_func(/; (int32 a) [int32] - ;return a + 1 - ;/) - ;/ - - In fact, all functions are special types of expressions which return themselves (a set of other statements). - ----------------------------------- - -1.8: Raw and Asm - -Sometimes, the programmer needs to impliment an exact or unique set of instructions -as lines of assembly to achieve their goal. Raw and Asm allow for that. - - 1.8.1: raw - The raw keyword tells the compiler to leave the entire block (and its contents) - to the programmer. - - When specifying a raw block, the compiler disables the borrow checker on the block and - everything inside it. It also strips any calling convention from the block if it is a function, - reducing the open and close of the block to a simple call and ret (or equiv). - - If the block is inline, it does even less. Simply carrying forward the instructions passed inside of the block. - - Note that any memory allocated inside the block will not be automatically de-allocated as is normal for variables, - so programmers must take care to make sure there are no memory leaks in the code. Thus, all memoty leaks can be - traced to a raw block. Any and every block may be raw. This includes the main function. - - 1.8.2: asm - The asm keyword may only be used in blocks marked as raw. - - not all the kinks are worked out yet, but the jist is this example: - - ;asm "<string of assembly>" - - you can also use variables in it as long as they are in scope. - - ;asm "mov ax, [some_var]" - - This paired with knowing the calling convention (i.e. where TNSL stores variables before and after a function call) - allows you complete control of the code and execution order if you so wish. - - ----------------------------------- - -Part 2: Related Features - -2.1: Style guide - As a basic rule, users of the language *should* be using the following style guide when - writing TNSL programming. Some of the following definitions are arbitrary, but style guides - are more for consistancy than code quality. - - If working on a large project, a differing style guide may be written to the compliment of the - programmers working on the specific project. - - The style guide is not meant as a way to keep people from programming in TNSL, and is not enforceable, - but should be followed nonetheless for no other reason than consistency in the code base. - - All TNSL specification are **heavily** encouraged to follow the style guide for ease of reading. - - - 2.1.1: Comments - Minimal comments in the code itself unless a particular implementation is fairly obtuse. - - Doc comments for functions should explain what the function is/does, not how it does it. - - Doc comment blocks should start with "/##" and end at the function or method written with "#;" - - e.g. - - /## main is the entry point for the program - - #; main - - ;/ - - - 2.1.2: Variable names - Variable names should be as descriptive as they need. - It is encouraged to make parameters more descriptive so others can see - what they might need to call a function. - - Otherwise single letters, snake_case, and other common cases are just fine - as long as it is fairly clear what is going on. - - constants should be in UPPER_CASE - - 2.1.3: Function and module names - It is recommended to use snake_case - ----------------------------------- - -2.2: The pre-processor - - ----------------------------------- - -Part 3: The TNSL Calling ABI - -Honestly I'm not that versed in assembly, I need to read up >_< - - ----------------------------------- - -Part 4: Standard libs - -4.1: Bare-metal - - -4.2: libts - libts is an effort to create a TNSL standard library and is too large for this document. - Please read related spec text documents. *(discussed more in libts.txt) - - -4.3: Cross Call libc - - ----------------------------------- - -Appendix A: Reserved Characters and their Uses - -( - Starting condition/expression mark open - -) - Starting condition/expression mark close - -[ - Ending condition/expression mark open - -] - Ending condition/expression mark close - -{ - Array/Set mark open - -} - Array/Set mark close - -: - Pre-processor Statement/Directive - -; - Code Statement - -# - Comment Statement - -, - Separates arguments or in-line statements - -= - Assignment operator - -. - Get operator (from struct or module) - -& - Bit-wise and operator - -| - Bit-wise or operator - -^ - Bit-wise xor operator - -> - Greater than boolean operator - -< - Less than boolean operator - -! - Not prefix for boolean expression - -+ - Addition/concat operator - -- - Subtraction operator - -* - Multiplication operator - -/ - Division operator - -% - Modulo operator - -~ - Address of (and define pointer type) operator - -` - Pointer de-reference operator - - ----------------------------------- - -Appendix B: Multi-Character Operators - -/; - Code block mark open - -;/ - Code block mark close - -/: - Pre-processor block mark open - -:/ - Pre-processor block mark close - -/# - Comment block mark open - -#/ - Comment block mark close - -;; - Redefine code block (acts as a shortcut for ;//;) - -:: - Redefine Pre-processor block (acts as shortcut for ://:) - -;# - Switch from code block to comment block (;//#) - -:# - Shortcut (://#) - -#; - Shortcut (#//;) - -#: - Shortcut (#//:) - -== - Boolean equals - -&& - Boolean and - -|| - Boolean or - -<< - Bit-wise l-shift - ->> - Bit-wise r-shift - -++ - Increment - --- - De-Increment - - -Augmented assignment operators (a = a <op> b) = (a <op>= b) -&= - -|= - -^= - -+= - --= - -*= - -/= - -%= - -~= - -`= - - -Augmented boolean operators (a !<op> b) = !(a <op> b) -!& - NAND - -!| - NOR - -!^ - XAND - -!== - Boolean equals - -!&& - -!|| - -!> - -!< - ->== - Same as !< - -<== - Same as !> - ----------------------------------- - -Appendix C: Memory control (and speed) with each type of struct - -Each type of user-definable type or struct or interface grants -it's own level of memory control. These (and their ramifications) are -listed here from low to high. - ---- - -High level, low control structs (dynamic structs) are created when using -the parameters for structs/types. They allow variable length which can -house different information at the cost of speed, memory, and control. - -These are the only type of structs which can house other dynamic structs. -Dynamic structs can only be passes by reference due to undefined size at -compilation. - ---- - -Medium level, medium control structs (type structs) are created normaly -through the struct keyword without parameters. These structs are fixed -length, but the compiler encodes extra info into them. This means they -get method resolution and override checks which may reduce speed of the -application. - ---- - -Low level, high control structs (raw structs) are created using the "raw" -keyword before the "struct" keyword. There are no frills, and method -resolution is not performed for these structs. These structs may not -extend or be extended. They may, however, implement interfaces. They -perform as a "what you see is what you get" kind of memory model. They -may not use parameters, and all internal types must be consistant length -(no dynamic structs or dynamic type identifiers). - ---- - -To summerize: -All these structs can encode the same info, but as you get lower to -the system, you get the bonus of speed and control while losing higher -level functions provided by the language. - -This shouldn't matter much to most programmers unless they are doing -embedded development, systems programming, or firmware programming, -but it is still a consideration to make for time-sensitive applications.
\ No newline at end of file diff --git a/src/tparse/tree-preproc.go b/src/tparse/tree-preproc.go new file mode 100644 index 0000000..d0f6637 --- /dev/null +++ b/src/tparse/tree-preproc.go @@ -0,0 +1,49 @@ +/* + Copyright 2020 Kyle Gunger + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package tparse + +func parsePreBlock (tokens *[]Token, tok, max int) (Node, int) { + out := Node{IsBlock: true} + out.Data = Token{Type: 11, Data: (*tokens)[tok].Data} + + tok++ + + for ; tok < max; tok++ { + t := (*tokens)[tok] + + if t.Data == ":/" { + break + } + + tmp := Node{Data: t, IsBlock: false} + out.Sub = append(out.Sub, tmp) + } + + return out, tok +} + +func parsePre (tokens *[]Token, tok, max int) (Node, int) { + out := Node{IsBlock: false} + out.Data = Token{Type: 11, Data: (*tokens)[tok].Data} + + tok++ + + tmp := Node{Data: (*tokens)[tok], IsBlock: false} + out.Sub = append(out.Sub, tmp) + + return out, tok +}
\ No newline at end of file diff --git a/src/tparse/tree-value.go b/src/tparse/tree-value.go index d03383e..7880a8b 100644 --- a/src/tparse/tree-value.go +++ b/src/tparse/tree-value.go @@ -19,18 +19,20 @@ package tparse // Ops order in TNSL // Cast/Paren > Address > Get > Inc/Dec > Math > Bitwise > Logic -var ORDER = map[string]int{ - // Address of +var UNARY = map[string]int { "~": 0, - // De-ref "`": 0, + "++": 2, + "--": 2, + "!": 6, +} + +var ORDER = map[string]int{ // Get ".": 1, - // Inc/Dec - "++": 2, - "--": 2, + "is": 2, // Multiplication "*": 3, @@ -61,9 +63,6 @@ var ORDER = map[string]int{ "!|": 6, "!^": 6, - // Not (prefix any bool or bitwise) - "!": 6, - // Boolean and "&&": 7, // Boolean or @@ -82,23 +81,133 @@ var ORDER = map[string]int{ "!>": 7, "!<": 7, + + // Assignement + "=": 8, +} + +// Works? Please test. +func parseUnaryOps(tokens *[]Token, tok, max int) (Node) { + out := Node{Data: Token{Type: 10, Data: "value"}, IsBlock: false} + val := false + + // Pre-value op scan + for ; tok < max && !val; tok++ { + t := (*tokens)[tok] + switch t.Type { + case DEFWORD: + fallthrough + case LITERAL: + out.Sub = append(out.Sub, Node{Data: t, IsBlock: false}) + val = true + case AUGMENT: + _, prs := UNARY[t.Data] + if !prs { + errOut("Parser bug! Operator failed to load into AST.", t) + } else { + out.Sub = append(out.Sub, Node{Data: t, IsBlock: false}) + } + default: + errOut("Unexpected token in value declaration", t) + } + } + + // Sanity check: make sure there's actually a value here + if !val { + errOut("Expected to find value, but there wasn't one", (*tokens)[max]) + } + + // Post-value op scan + for ; tok < max; tok++ { + t := (*tokens)[tok] + switch t.Type { + case DELIMIT: + var tmp Node + switch t.Data { + case "(": // Function call + //TODO: parse list of values here + case "[": // Typecasting + tmp, tok = parseType(tokens, tok, max, false) + out.Sub = append(out.Sub, tmp) + case "{": // Array indexing + tmp = Node{Data: Token{Type: 10, Data: "index"}} + var tmp2 Node + tmp2, tok = parseValue(tokens, tok + 1, max) + tmp.Sub = append(tmp.Sub, tmp2) + out.Sub = append(out.Sub, tmp) + default: + errOut("Unexpected delimiter when parsing value", t) + } + case AUGMENT: + _, prs := UNARY[t.Data] + if !prs { + errOut("Parser bug! Operator failed to load into AST.", t) + } else { + out.Sub = append(out.Sub, Node{Data: t, IsBlock: false}) + } + default: + errOut("Unexpected token in value declaration", t) + } + } + + return out +} + +// Works? Please test. +func parseBinaryOp(tokens *[]Token, tok, max int) (Node) { + out := Node{IsBlock: false} + first := tok + var high, highOrder, bincount int = first, 8, 0 + + // Find first high-order op + for ; tok < max; tok++ { + t := (*tokens)[tok] + if t.Type == AUGMENT { + order, prs := ORDER[t.Data] + if !prs { + continue + } else if order > highOrder { + high, highOrder = tok, order + } + // TODO: Add in case for the "is" operator + bincount++ + } + } + + out.Data = (*tokens)[high] + + if bincount == 0 { + // No binops means we have a value to parse. Parse all unary ops around it. + return parseUnaryOps(tokens, first, max) + } else { + // Recursive split to lower order operations + out.Sub = append(out.Sub, parseBinaryOp(tokens, first, high)) + out.Sub = append(out.Sub, parseBinaryOp(tokens, high + 1, max)) + } + + return out } +// TODO: fix this func parseValue(tokens *[]Token, tok, max int) (Node, int) { - out := Node{} + first := tok + for ; tok < max; tok++ { t := (*tokens)[tok] switch t.Type { - case LITERAL: - case DEFWORD: + case LINESEP: + case INLNSEP: case DELIMIT: + case AUGMENT: + case LITERAL: } } - return out, tok + return parseBinaryOp(tokens, first, tok), tok } +// TODO: make sure this actually works func parseVoidType(tokens *[]Token, tok, max int) (Node, int) { out := Node{} working := &out @@ -148,8 +257,9 @@ func parseVoidType(tokens *[]Token, tok, max int) (Node, int) { return out, tok } +// TODO: make sure this actually works func parseType(tokens *[]Token, tok, max int, param bool) (Node, int) { - out := Node{} + out := Node{Data: Token{Type: 10, Data: "type"}} working := &out for ; tok < max; tok++ { diff --git a/src/tparse/tree.go b/src/tparse/tree.go index 08c94bf..af3e184 100644 --- a/src/tparse/tree.go +++ b/src/tparse/tree.go @@ -44,9 +44,9 @@ func MakeTree(tokens *[]Token, file string) Node { case ";": tmp, tok = parseStatement(tokens, tok, max) case "/:": - tmp = Node{} + tmp, tok = parsePreBlock(tokens, tok + 1, max) case ":": - tmp = Node{} + tmp, tok = parsePre(tokens, tok + 1, max) default: errOut("Unexpected token in file root", t) } diff --git a/src/tparse/type.go b/src/tparse/type.go index b860f02..7113c0b 100644 --- a/src/tparse/type.go +++ b/src/tparse/type.go @@ -56,6 +56,7 @@ var PREWORDS = []string{ "else", "abi", //"mark", + "using", } func checkPreWord(s string) int { @@ -92,7 +93,7 @@ var RESWORD = map[string]int{ "struct": KEYWORD, "interface": KEYWORD, "enum": KEYWORD, - "is": KEYWORD, + "is": AUGMENT, "extends": KEYWORD, "loop": KEYWORD, @@ -126,6 +127,8 @@ var RESWORD = map[string]int{ "true": LITERAL, "false": LITERAL, + "alloc": KEYWORD, + "calloc": KEYWORD, "delete": KEYWORD, "module": KEYWORD, |