1
0

documentation

This commit is contained in:
Arpad Ryszka 2026-01-18 22:52:27 +01:00
parent dd6cdb1aac
commit 4c6c817431
39 changed files with 2077 additions and 175 deletions

View File

@ -52,10 +52,12 @@ headexported.go: .build/headexported.go
cp .build/headexported.go . cp .build/headexported.go .
.build/self.go: $(sources) $(parsers) head.go headexported.go .build .build/self.go: $(sources) $(parsers) head.go headexported.go .build
# since generator code depends on the syntax itself, we need to passes: # since the generator code depends on the syntax itself, and such influences its own output, we need two
# passes:
go build -o .build/treerack.current ./cmd/treerack go build -o .build/treerack.current ./cmd/treerack
.build/treerack.current generate --export --package-name self < syntax.treerack > .build/self.go .build/treerack.current generate --export --package-name self < syntax.treerack > .build/self.go
go fmt .build/self.go go fmt .build/self.go
# we backup the original and apply the new:
cp internal/self/self.go .build/self.go.backup cp internal/self/self.go .build/self.go.backup
cp .build/self.go internal/self cp .build/self.go internal/self
# second pass: # second pass:

View File

@ -1,9 +0,0 @@
generator, in-process init or command line
syntax from text or defined during runtime, or combined
simple syntax with recursion
no lexer required
utf8, 8bit or custom tokens
abstract syntax tree from text of arbitrary syntax
reading from streams
context free, however support for custom tokens in the input
custom tokens for indentation built in

View File

@ -2,49 +2,48 @@
Generated with https://code.squareroundforest.org/arpio/docreflect Generated with https://code.squareroundforest.org/arpio/docreflect
*/ */
package main package main
import "code.squareroundforest.org/arpio/docreflect" import "code.squareroundforest.org/arpio/docreflect"
func init() { func init() {
docreflect.Register("main", "") docreflect.Register("main", "")
docreflect.Register("main.check", "check parses input content against the provided syntax definition and fails if the input does not match.\nSyntax can be provided via a filename option or an inline string option. Input can be provided via a filename\noption, a positional argument filename, an inline string option, or piped from standard input.\n\nfunc(o, stdin, args)") docreflect.Register("main.check", "check parses input content against the provided syntax definition and fails if the input does not match.\nSyntax can be provided via a filename option or an inline string option. Input can be provided via a filename\noption, a positional argument filename, an inline string option, or piped from standard input.\n\nfunc(o, stdin, args)")
docreflect.Register("main.checkOptions", "") docreflect.Register("main.checkOptions", "")
docreflect.Register("main.checkOptions.Input", "Input specifies the filename of the input content to be validated.\n") docreflect.Register("main.checkOptions.Input", "Input specifies the filename of the input content to be validated.\n")
docreflect.Register("main.checkOptions.InputString", "InputString specifies the input content as an inline string.\n") docreflect.Register("main.checkOptions.InputString", "InputString specifies the input content as an inline string.\n")
docreflect.Register("main.checkOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") docreflect.Register("main.checkOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
docreflect.Register("main.checkOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") docreflect.Register("main.checkOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
docreflect.Register("main.checkSyntax", "checkSyntax validates a syntax definition. The syntax may be provided via a file path (using an option or a\npositional argument), an inline string, or piped from standard input.\n\nfunc(o, stdin, args)") docreflect.Register("main.checkSyntax", "checkSyntax validates a syntax definition. The syntax may be provided via a file path (using an option or a\npositional argument), an inline string, or piped from standard input.\n\nfunc(o, stdin, args)")
docreflect.Register("main.checkSyntaxOptions", "") docreflect.Register("main.checkSyntaxOptions", "")
docreflect.Register("main.checkSyntaxOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") docreflect.Register("main.checkSyntaxOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
docreflect.Register("main.checkSyntaxOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") docreflect.Register("main.checkSyntaxOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
docreflect.Register("main.errInvalidFilename", "") docreflect.Register("main.errInvalidFilename", "")
docreflect.Register("main.errMultipleInputs", "") docreflect.Register("main.errMultipleInputs", "")
docreflect.Register("main.errNoInput", "") docreflect.Register("main.errNoInput", "")
docreflect.Register("main.generate", "generate generates Go code that can parse arbitrary input with the provided syntax, and can be used embedded\nin an application.\n\nThe syntax may be provided via a file path (using an option or a positional argument), an\ninline string, or piped from standard input.\n\nfunc(o, stdin, stdout, args)") docreflect.Register("main.generate", "generate generates Go code that can parse arbitrary input with the provided syntax, and can be used embedded\nin an application.\n\nThe syntax may be provided via a file path (using an option or a positional argument), an\ninline string, or piped from standard input.\n\nfunc(o, stdin, stdout, args)")
docreflect.Register("main.generateOptions", "") docreflect.Register("main.generateOptions", "")
docreflect.Register("main.generateOptions.Export", "Export determines whether the generated parse function is exported (visible outside its package).\n") docreflect.Register("main.generateOptions.Export", "Export determines whether the generated parse function is exported (visible outside its package).\n")
docreflect.Register("main.generateOptions.PackageName", "PackageName specifies the package name for the generated code. Defaults to main.\n") docreflect.Register("main.generateOptions.PackageName", "PackageName specifies the package name for the generated code. Defaults to main.\n")
docreflect.Register("main.generateOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") docreflect.Register("main.generateOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
docreflect.Register("main.generateOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") docreflect.Register("main.generateOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
docreflect.Register("main.init", "\nfunc()") docreflect.Register("main.init", "\nfunc()")
docreflect.Register("main.initInput", "\nfunc(filename, stringValue, stdin, args)") docreflect.Register("main.initInput", "\nfunc(filename, stringValue, stdin, args)")
docreflect.Register("main.main", "\nfunc()") docreflect.Register("main.main", "\nfunc()")
docreflect.Register("main.mapNode", "\nfunc(n)") docreflect.Register("main.mapNode", "\nfunc(n)")
docreflect.Register("main.node", "") docreflect.Register("main.node", "")
docreflect.Register("main.node.From", "") docreflect.Register("main.node.From", "")
docreflect.Register("main.node.Name", "") docreflect.Register("main.node.Name", "")
docreflect.Register("main.node.Nodes", "") docreflect.Register("main.node.Nodes", "")
docreflect.Register("main.node.Text", "") docreflect.Register("main.node.Text", "")
docreflect.Register("main.node.To", "") docreflect.Register("main.node.To", "")
docreflect.Register("main.noop", "\nfunc()") docreflect.Register("main.noop", "\nfunc()")
docreflect.Register("main.show", "show input content against a provided syntax definition and outputs the resulting AST (Abstract Syntax Tree)\nin JSON format. Syntax can be provided via a filename option or an inline string option. Input can be\nprovided via a filename option, a positional argument filename, an inline string option, or piped from\nstandard input.\n\nfunc(o, stdin, stdout, args)") docreflect.Register("main.show", "show input content against a provided syntax definition and outputs the resulting AST (Abstract Syntax Tree)\nin JSON format. Syntax can be provided via a filename option or an inline string option. Input can be\nprovided via a filename option, a positional argument filename, an inline string option, or piped from\nstandard input.\n\nfunc(o, stdin, stdout, args)")
docreflect.Register("main.showOptions", "") docreflect.Register("main.showOptions", "")
docreflect.Register("main.showOptions.Indent", "Indent specifies a custom indentation string for the output.\n") docreflect.Register("main.showOptions.Indent", "Indent specifies a custom indentation string for the output.\n")
docreflect.Register("main.showOptions.Input", "Input specifies the filename of the input content to be validated.\n") docreflect.Register("main.showOptions.Input", "Input specifies the filename of the input content to be validated.\n")
docreflect.Register("main.showOptions.InputString", "InputString specifies the input content as an inline string.\n") docreflect.Register("main.showOptions.InputString", "InputString specifies the input content as an inline string.\n")
docreflect.Register("main.showOptions.Pretty", "Pretty enables indented, human-readable output.\n") docreflect.Register("main.showOptions.Pretty", "Pretty enables indented, human-readable output.\n")
docreflect.Register("main.showOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") docreflect.Register("main.showOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
docreflect.Register("main.showOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") docreflect.Register("main.showOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
docreflect.Register("main.version", "") docreflect.Register("main.version", "")
} }

View File

@ -105,5 +105,9 @@ func show(o showOptions, stdin io.Reader, stdout io.Writer, args ...string) erro
return err return err
} }
if _, err := stdout.Write([]byte{'\n'}); err != nil {
return err
}
return nil return nil
} }

View File

@ -128,7 +128,7 @@ func TestShow(t *testing.T) {
t.Fatal(nil) t.Fatal(nil)
} }
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` { if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
t.Fatal(out.String()) t.Fatal(out.String())
} }
}) })
@ -144,7 +144,7 @@ func TestShow(t *testing.T) {
t.Fatal(nil) t.Fatal(nil)
} }
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` { if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
t.Fatal(out.String()) t.Fatal(out.String())
} }
}) })
@ -159,7 +159,7 @@ func TestShow(t *testing.T) {
t.Fatal(nil) t.Fatal(nil)
} }
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` { if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
t.Fatal(out.String()) t.Fatal(out.String())
} }
}) })
@ -172,7 +172,7 @@ func TestShow(t *testing.T) {
t.Fatal(nil) t.Fatal(nil)
} }
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` { if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
t.Fatal(out.String()) t.Fatal(out.String())
} }
}) })
@ -189,7 +189,7 @@ func TestShow(t *testing.T) {
t.Fatal(nil) t.Fatal(nil)
} }
const expect = "{\n \"name\": \"foo\",\n \"from\": 0,\n \"to\": 3,\n \"text\": \"bar\"\n}" const expect = "{\n \"name\": \"foo\",\n \"from\": 0,\n \"to\": 3,\n \"text\": \"bar\"\n}\n"
if out.String() != expect { if out.String() != expect {
t.Fatal(out.String()) t.Fatal(out.String())
} }
@ -207,7 +207,7 @@ func TestShow(t *testing.T) {
t.Fatal(nil) t.Fatal(nil)
} }
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}" { if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}\n" {
t.Fatal(out.String()) t.Fatal(out.String())
} }
}) })
@ -225,7 +225,7 @@ func TestShow(t *testing.T) {
t.Fatal(nil) t.Fatal(nil)
} }
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}" { if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}\n" {
t.Fatal(out.String()) t.Fatal(out.String())
} }
}) })

View File

@ -0,0 +1,47 @@
// first define our whitespace chars:
ignore:ws = " " | [\t] | [\r] | [\n];
// define the format of input numbers. With the :nows flag we declare that we don't expect ignored spaces
// between the digits and the delimiters. We support integers, floating point numbers, and floating point
// numbers with their exponential notation. We don't support arbitrary leading zeros to avoid confusion with the
// octal representation of numbers, which is not supported here.
num:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?;
// define the supported operators:
add = "+";
sub = "-";
mul = "*";
div = "/";
// let's define grouping. Any expression can be grouped. The definition of the expression can be found further
// down in the syntax document. This usage of the expression reference is also a good example for recursive
// definitions. Using the :alias flag prevents generating a separate node in the resulting AST.
group:alias = "(" expression ")";
// we group the operators by precedence. This is necessary to parse the expressions like a * b + c in a structure
// that is equivalent to (a * b) + c.
op0:alias = mul | div;
op1:alias = add | sub;
// we also define which operands can be used at which precedence level. Notice, how operand1 also allows binary0
// expressions.
operand0:alias = num | group;
operand1:alias = operand0 | binary0;
// using the prioritized operators, we can define the prioritized binary expressions. We support a + b + c, and
// not only a + b.
binary0 = operand0 (op0 operand0)+;
binary1 = operand1 (op1 operand1)+;
binary:alias = binary0 | binary1;
// let's define, what an expression can be. Notice the recursion along expression and group.
expression:alias = num | group | binary;
// finally, define the root of the parser, the result of the arithmetic expression. It can be any expression,
// but since we used the :alias flag for the expression definition, we need to add a non-alias parser that will
// represent the root of the resulting AST. This also allows us to define an "exit" token, which can be used
// exit from the REPL loop of our application.
//
// Note that we don't need to use the :root flag here, because it is our last definition, and this means that
// the expression is the root parser of the syntax.
result = expression | "exit"

View File

@ -0,0 +1,3 @@
module acalac
go 1.25.4

143
docs/examples/acalc/main.go Normal file
View File

@ -0,0 +1,143 @@
package main
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"os"
"strings"
)
var errExit = errors.New("exit")
func repl(input io.Reader, output io.Writer) {
// use buffered io, to be able to read the input line-by-line:
buf := bufio.NewReader(os.Stdin)
// our REPL loop:
for {
// print a basic prompt:
if _, err := output.Write([]byte("> ")); err != nil {
// we cannot fix it if there is an error here:
log.Fatalln(err)
}
// read the input and handle the errors:
expr, err := read(buf)
// when EOF, that means the user pressed Ctrl+D. Let's terminate the output with a conventional newline
// and exit:
if errors.Is(err, io.EOF) {
output.Write([]byte{'\n'})
os.Exit(0)
}
// when errExit, that means the user entered exit:
if errors.Is(err, errExit) {
os.Exit(0)
}
// if it's a parser error, we print and continue from reading again, to allow the user to fix the
// problem:
var perr *parseError
if errors.As(err, &perr) {
log.Println(err)
continue
}
// in case of any other error, we don't know what's going on, so we get out of here right away:
if err != nil {
log.Fatalln(err)
}
// if we received an expression, then we can evaluate it. We are not expecting errors here:
result := eval(expr)
// we have the result, we need to print it:
if err := print(output, result); err != nil {
// if printing fails, we don't know how to fix it, so we get out of here:
log.Fatalln(err)
}
}
}
func read(input *bufio.Reader) (*node, error) {
line, err := input.ReadString('\n')
if err != nil {
return nil, err
}
// expr will be of type *node, which type is defined in the generated code
expr, err := parse(bytes.NewBufferString(line))
if err != nil {
return nil, err
}
if strings.TrimSpace(expr.Text()) == "exit" {
return nil, errExit
}
// we know based on the syntax, that the top level node will always have a single child, either a number
// literal or a binary operation:
return expr.Nodes[0], nil
}
// eval always returns the calculated result as a float64:
func eval(expr *node) float64 {
// we know that it's either a number or a binary operation:
var value float64
switch expr.Name {
case "num":
// the number format in our syntax is based on the JSON spec, so we can piggy-back on it for the number
// parsing. In a real application, we would need to handle the errors here anyway, even if our parser
// already validated the input:
json.Unmarshal([]byte(expr.Text()), &value)
return value
default:
// we know that the first node is either a number of a child expression:
value, expr.Nodes = eval(expr.Nodes[0]), expr.Nodes[1:]
// we don't need to track back, so we can drop the processed nodes while consuming them:
for len(expr.Nodes) > 0 {
var (
operator string
operand float64
)
operator, operand, expr.Nodes = expr.Nodes[0].Name, eval(expr.Nodes[1]), expr.Nodes[2:]
switch operator {
case "add":
value += operand
case "sub":
value -= operand
case "mul":
value *= operand
case "div":
// Go returns -Inf or +Inf on division by zero:
value /= operand
}
}
}
return value
}
func print(output io.Writer, result float64) error {
_, err := fmt.Fprintln(output, result)
return err
}
func main() {
// for testability, we define the REPL loop in a separate function so that the test code can call it with
// in-memory buffers as input and output. Our main function calls it with the stdio handles:
repl(os.Stdin, os.Stdout)
}

File diff suppressed because one or more lines are too long

629
docs/manual.md Normal file
View File

@ -0,0 +1,629 @@
# Treerack Manual
This manual describes the primary use cases and workflows supported by Treerack.
## Prerequisits
We assume a working installation of the standard Go tooling.
This manual relies on the treerack command-line tool. We can install it using one of the following methods.
**A. source installation (requires make):**
1. clone the repository `git clone https://code.squareroundforest.org/arpio/treerack`
2. navigate to the source directory, run: `make install`. To install it to a custom location, use the `prefix`
environment variable, e.g. run `prefix=~/.local make install`
3. verify the installation: run `treerack version` and `man treerack`
**B. via go install:**
Alternatively, we _may be able to_ install directly using the Go toolchain:
1. run `go install code.squareroundforest.org/arpio/treerack/cmd/treerack`
2. verify: `treerack help`
## Hello syntax
A basic syntax definition looks like this:
```
hello = "Hello, world!"
```
This definition matches only the exact string "Hello, world!" and nothing else. To test the validity of this
rule, run:
```
treerack check-syntax --syntax-string 'hello = "Hello, world!"'
```
If successful, the command exits silently with code 0. (We can append && echo ok to advertise successful
execution).
To test the syntax against actual input content:
```
treerack check --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!'
```
To visualize the resulting Abstract Syntax Tree (AST), use the show subcommand:
```
treerack show --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!'
```
The output will be raw JSON:
```
{"name":"hello","from":0,"to":13,"text":"Hello, world!"}
```
For a more readable output, add the --pretty flag:
```
treerack show --pretty --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!'
```
...then the output will look like this:
```
{
"name": "hello",
"from": 0,
"to": 13,
"text": "Hello, world!"
}
```
### Handling errors
If our syntax definition is invalid, check-syntax will fail:
```
treerack check-syntax --syntax-string 'foo = bar'
```
The above command will fail because the parser called foo references an undefined parser bar.
We can use check or show to detect when the input content does not match a valid syntax. Using the hello syntax,
we can try the following:
```
treerack check --syntax-string 'hello = "Hello, world!"' --input-string 'Hi!'
```
It will show that parsing the input failed and that it failed while using the parser hello.
## Basic syntax - An arithmetic calculator
In this section, we will build a basic arithmetic calculator. It will read a line from standard input, parse it
as an arithmetic expression, compute the result, and print it—effectively creating a REPL (Read-Eval-Print
Loop).
We will support addition +, subtraction -, multiplication *, division /, and grouping with parentheses ().
acalc.treerack:
```
// Define whitespace characters.
// The :ws flag marks this as the global whitespace handler.
ignore:ws = " " | [\t] | [\r] | [\n];
// Define the number format.
//
// The :nows flag ensures we do not skip whitespace *inside* the number token. We support integers, floats, and
// scientific notation (e.g., 1.5e3). Arbitrary leading zeros are disallowed to prevent confusion with octal
// literals.
num:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?;
// define the supported operators:
add = "+";
sub = "-";
mul = "*";
div = "/";
// Grouping logic.
//
// Expressions can be enclosed in parentheses. This references 'expression', which is defined later,
// demonstrating recursive definitions. The :alias flag prevents 'group' from creating its own node in the AST;
// only the child 'expression' will appear.
group:alias = "(" expression ")";
// Operator Precedence.
//
// We group operators by precedence levels to ensure correct order of operations.
//
// Level 0 (High): Multiplication/Division
op0:alias = mul | div;
// Level 1 (Low): Addition/Subtraction
op1:alias = add | sub;
// Operands for each precedence level.
//
// operand0 can be a raw number or a grouped expression.
operand0:alias = num | group;
// operand1 can be a higher-precedence operand or a completed binary0 operation.
operand1:alias = operand0 | binary0;
// Binary Expressions.
//
// We define these hierarchically. 'binary0' handles high-precedence operations (mul/div).
binary0 = operand0 (op0 operand0)+;
binary1 = operand1 (op1 operand1)+;
binary:alias = binary0 | binary1;
// The generalized Expression.
//
// An expression is either a raw number, a group, or a binary operation.
expression:alias = num | group | binary;
// Root Definition.
//
// The final result is either a valid expression or the "exit" command. Since 'expression' is an alias, we need
// a concrete root parser to anchor the AST. Note: The :root flag is optional here because this is the last
// definition in the file.
result = expression | "exit"
```
### Testing the syntax
#### 1. Simple number
```
treerack show --pretty --syntax acalc.treerack --input-string 42
```
Output:
```
{
"name": "result",
"from": 0,
"to": 2,
"nodes": [
{
"name": "num",
"from": 0,
"to": 2,
"text": "42"
}
]
}
```
#### 2. Basic operation
```
treerack show --pretty --syntax acalc.treerack --input-string "42 + 24"
```
Output:
```
{
"name": "expression",
"from": 0,
"to": 7,
"nodes": [
{
"name": "binary1",
"from": 0,
"to": 7,
"nodes": [
{
"name": "num",
"from": 0,
"to": 2,
"text": "42"
},
{
"name": "add",
"from": 3,
"to": 4,
"text": "+"
},
{
"name": "num",
"from": 5,
"to": 7,
"text": "24"
}
]
}
]
}
```
#### 3. Precedence check
```
treerack show --pretty --syntax acalc.treerack --input-string "42 + 24 * 2"
```
Output:
```
{
"name": "result",
"from": 0,
"to": 11,
"nodes": [
{
"name": "binary1",
"from": 0,
"to": 11,
"nodes": [
{
"name": "num",
"from": 0,
"to": 2,
"text": "42"
},
{
"name": "add",
"from": 3,
"to": 4,
"text": "+"
},
{
"name": "binary0",
"from": 5,
"to": 11,
"nodes": [
{
"name": "num",
"from": 5,
"to": 7,
"text": "24"
},
{
"name": "mul",
"from": 8,
"to": 9,
"text": "*"
},
{
"name": "num",
"from": 10,
"to": 11,
"text": "2"
}
]
}
]
}
]
}
```
#### 4. Grouping override
```
treerack show --pretty --syntax acalc.treerack --input-string "(42 + 24) * 2"
```
Notice how the 'group' alias node is not present, but now the expression of the addition is a factor in the
multiplication:
```
{
"name": "result",
"from": 0,
"to": 13,
"nodes": [
{
"name": "binary0",
"from": 0,
"to": 13,
"nodes": [
{
"name": "binary1",
"from": 1,
"to": 8,
"nodes": [
{
"name": "num",
"from": 1,
"to": 3,
"text": "42"
},
{
"name": "add",
"from": 4,
"to": 5,
"text": "+"
},
{
"name": "num",
"from": 6,
"to": 8,
"text": "24"
}
]
},
{
"name": "mul",
"from": 10,
"to": 11,
"text": "*"
},
{
"name": "num",
"from": 12,
"to": 13,
"text": "2"
}
]
}
]
}
```
## Generator - Implementing the calculator
We will now generate the Go parser code and integrate it into a CLI application.
Initialize the project:
```
go mod init acalc && go mod tidy
```
Generate the parser:
```
treerack generate --syntax acalc.treerack > parser.go
```
Implement the application logic in main.go.
main.go:
```
package main
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"os"
"strings"
)
var errExit = errors.New("exit")
// repl runs the Read-Eval-Print Loop.
func repl(input io.Reader, output io.Writer) {
// use buffered io, to be able to read the input line-by-line:
buf := bufio.NewReader(os.Stdin)
// our REPL loop:
for {
// print a basic prompt:
if _, err := output.Write([]byte("> ")); err != nil {
log.Fatalln(err)
}
// read the input and handle the errors:
expr, err := read(buf)
// Handle EOF (Ctrl+D)
if errors.Is(err, io.EOF) {
output.Write([]byte{'\n'})
os.Exit(0)
}
// Handle explicit exit command
if errors.Is(err, errExit) {
os.Exit(0)
}
// Handle parser errors (allow user to retry)
var perr *parseError
if errors.As(err, &perr) {
log.Println(err)
continue
}
if err != nil {
log.Fatalln(err)
}
// Evaluate and print
result := eval(expr)
if err := print(output, result); err != nil {
log.Fatalln(err)
}
}
}
func read(input *bufio.Reader) (*node, error) {
line, err := input.ReadString('\n')
if err != nil {
return nil, err
}
// Parse the line using the generated parser
expr, err := parse(bytes.NewBufferString(line))
if err != nil {
return nil, err
}
if strings.TrimSpace(expr.Text()) == "exit" {
return nil, errExit
}
// Based on our syntax, the root node always has exactly one child:
// either a number or a binary operation.
return expr.Nodes[0], nil
}
// eval always returns the calculated result as a float64:
func eval(expr *node) float64 {
var value float64
switch expr.Name {
case "num":
// the number format in our syntax is based on the JSON spec, so we can piggy-back on it for the number
// parsing. In a real application, we would need to handle the errors here anyway, even if our parser
// already validated the input:
json.Unmarshal([]byte(expr.Text()), &value)
return value
default:
// Handle binary expressions (recursively)
// Format: Operand [Operator Operand]...
value, expr.Nodes = eval(expr.Nodes[0]), expr.Nodes[1:]
for len(expr.Nodes) > 0 {
var (
operator string
operand float64
)
operator, operand, expr.Nodes = expr.Nodes[0].Name, eval(expr.Nodes[1]), expr.Nodes[2:]
switch operator {
case "add":
value += operand
case "sub":
value -= operand
case "mul":
value *= operand
case "div":
value /= operand // Go handles division by zero as ±Inf
}
}
}
return value
}
func print(output io.Writer, result float64) error {
_, err := fmt.Fprintln(output, result)
return err
}
func main() {
// for testability, we define the REPL loop in a separate function so that the test code can call it with
// in-memory buffers as input and output. Our main function calls it with the stdio handles:
repl(os.Stdin, os.Stdout)
}
```
### Running the calculator
Our arithmetic calculator is now ready. We can run it via `go run .`. An example session may look like this:
```
$ go run .
> (42 + 24) * 2
132
> 42 + 24 * 2
90
> 1 + 2 + 3
6
> exit
```
We can find the source files for this example here: [./examples/acalc](./examples/acalc).
## Important Note: Unescaping
Treerack does not automatically handle escape sequences (e.g., converting \n to a literal newline). If our
syntax supports escaped characters—common in string literals—the user code is responsible for "unescaping" the
raw text from the AST node.
This is analogous to how we needed to parse the numbers in the calculator example to convert the string
representation of a number into a Go float64.
## Programmatically loading syntaxes
While generating static code via treerack generate is the recommended approach, we can also load definitions
dynamically at runtime.
```
package parser
import (
"io"
"code.squareroundforest.org/arpio/treerack"
)
func initAndParse(syntax, content io.Reader) (*treerack.Node, error) {
s := &treerack.Syntax{}
if err := s.ReadSyntax(syntax); err != nil {
return nil, err
}
if err := s.Init(); err != nil {
return nil, err
}
return s.Parse(content)
}
```
Caution: Be mindful of security implications when loading syntax definitions from untrusted sources.
## Programmatically defining syntaxes
In rare cases where a syntax must be constructed computationally, we can define rules via the Go API:
```
package parser
import (
"io"
"code.squareroundforest.org/arpio/treerack"
)
func initAndParse(content io.Reader) (*treerack.Node, error) {
s := &treerack.Syntax{}
// whitespace:
s.Class("whitespace-chars", treerack.Alias, false, []rune{' ', '\t', '\r\, '\n'}, nil)
s.Choice("whitespace", treerack.Whitespace, "whitespace-chars")
s.Class("digit", treerack.Alias, false, nil, [][]rune{'0', '9'})
s.Sequence("number", treerack.NoWhitespace, treerack.SequenceItem{Name: "digit", Min: 1})
s.Class("operator", treerack.None, false, []rune{'+', '-'}, nil)
s.Sequence(
"expression",
treerack.Root,
treerack.SequenceItem{Name: "number"},
treerack.SequenceItem{Name: "operator"},
treerack.SequenceItem{Name: "number"},
)
if err := s.Init(); err != nil {
return nil, err
}
return s.Parse(content)
}
```
## Summary
We have demonstrated how to use the Treerack tool to define, test, and implement a parser. We recommend the
following workflow:
1. draft: define a syntax in a .treerack file.
2. verify: use `treerack check` and `treerack show` to validate building blocks incrementally.
3. generate: use `treerack generate` to create embeddable Go code.
**Links:**
- the detailed documentation of the treerack definition language: [./syntax.md](./syntax.md)
- treerack command help: [../cmd/treerack/readme.md](../cmd/treerack/readme.md) or, if the command is installed,
`man treerack`, or `path/to/treerack help`
- the arithmetic calculator example: [./examples/acalc](./examples/acalc).
- additional examples: [./examples](./examples)
Happy parsing!

121
docs/syntax.md Normal file
View File

@ -0,0 +1,121 @@
# Treerack Syntax Definition Language
The Treerack library uses a custom grammar description language derived from EBNF (Extended Backus-Naur Form).
It allows for the concise definition of recursive descent parsers.
A syntax file consists of a series of Production Rules (definitions), terminated by semicolons.
## Production Rules
A rule assigns a name to a pattern expression. Rules may include optional flags to modify the parser's behavior
or the resulting AST (Abstract Syntax Tree).
```
RuleName = Expression;
RuleName:flag1:flag2 = Expression;
```
## Flags
Flags are appended to the rule name, separated by colons. They control AST generation, whitespace handling, and
error propagation.
- `alias`: Transparent Node. The rule validates input but does not create its own node in the AST. Children
nodes (if any) are attached to the parent of this rule.
- `ws`: Global Whitespace. Marks this rule as the designated whitespace handler. The parser will attempt to
match (and discard) this rule between tokens throughout the entire syntax.
- `nows`: No Whitespace. Disables automatic whitespace skipping inside this rule. Useful for defining tokens
like string literals where spaces are significant.
- `root`: Entry Point. Explicitly marks the rule as the starting point of the syntax. If omitted, the last
defined rule is implied to be the root.
- `kw`: Keyword. Marks the content as a reserved keyword.
- `nokw`: No Keyword. Prevents the rule from matching text that matches a defined kw rule. Essential for
distinguishing identifiers from keywords (e.g., ensuring var is not parsed as a variable name).
- `failpass`: Pass Failure. If this rule fails to parse, the error is reported as a failure of the parent rule,
not this specific rule.
## Expressions
Expressions define the structure of the text to be parsed. They are composed of terminals, sequences, choices,
and quantifiers.
## Terminals
Terminals match specific characters or strings in the input.
- `"abc"` (string): Matches an exact sequence of characters.
- `.` (any char): Matches any single character (wildcard).
- `[123]`, `[a-z]`, `[123a-z]` (class): Matches a single character from a set or range.
- `[^123]`, `[^a-z]`, `[^123a-z]` (not class) Matches any single character not in the set.
## Quantifiers
Quantifiers determine how many times an item must match. They are placed immediately after the item they modify.
- `?`: Optional (Zero or one).
- `*`: Zero or more.
- `+`: One or more.
- `{n}`: Exact count. Matches exactly n times.
- `{n,}`: At least. Matches n or more times.
- `{,m}`: At most. Matches between 0 and m times.
- `{n,m}`: Range. Matches between n and m times.
## Composites
Complex patterns are built by combining terminals and other rules.
### 1. Sequences
Items written consecutively are matched in order.
```
// Matches "A", then "B", then "C"
MySequence = "A" "B" "C";
```
### 2. Grouping
Parentheses (...) group items together, allowing quantifiers to apply to the entire group.
```
// Matches "AB", "ABAB", "ABABAB"...
MyGroup = ("A" "B")+;
```
### 3. Choices
The pipe | character represents a choice between alternatives.
The parser evaluates all provided options against the input at the current position and selects the best match
based on the following priority rules:
1. _Longest Match_: The option that consumes the largest number of characters takes priority. This eliminates the
need to manually order specific matches before general ones (e.g., "integer" will always be chosen over "int" if
the input supports it, regardless of their order in the definition).
2. _First Definition Wins_: If multiple options consume the exact same number of characters, the option defined
first(left-most) in the list takes priority.
```
// Longest match wins automatically:
// Input "integer" is matched by 'type', even though "int" comes first.
type = "int" | "integer";
// Tie-breaker rule:
// If input is "foo", both options match 3 characters.
// Because 'identifier' is last, it takes priority over 'keyword'.
// (Use :kw and :nokw to control such situations, when it applies.)
content = keyword | identifier;
```
## Comments
Comments follow C-style syntax and are ignored by the definition parser.
- Line comments: Start with // and end at the newline.
- Block comments: Enclosed in /* ... */.
## Examples
- [JSON](examples/json.treerack)
- [Scheme](examples/scheme.treerack)
- [Treerack (itself)](../syntax.treerack)

View File

@ -61,33 +61,3 @@ func unescapeChar(c rune) rune {
return c return c
} }
} }
func unescape(escape rune, banned, chars []rune) ([]rune, error) {
var (
unescaped []rune
escaped bool
)
for _, ci := range chars {
if escaped {
unescaped = append(unescaped, unescapeChar(ci))
escaped = false
continue
}
switch {
case ci == escape:
escaped = true
case runesContain(banned, ci):
return nil, ErrInvalidEscapeCharacter
default:
unescaped = append(unescaped, ci)
}
}
if escaped {
return nil, ErrInvalidEscapeCharacter
}
return unescaped, nil
}

View File

@ -2,33 +2,8 @@ package treerack
import "testing" import "testing"
func TestUnescape(t *testing.T) {
t.Run("char should be escaped", func(t *testing.T) {
if _, err := unescape('\\', []rune{'a'}, []rune{'a'}); err == nil {
t.Error("failed to fail")
}
})
t.Run("finished with escape char", func(t *testing.T) {
if _, err := unescape('\\', []rune{'a'}, []rune{'b', '\\'}); err == nil {
t.Error("failed to fail")
}
})
t.Run("unescapes", func(t *testing.T) {
u, err := unescape('\\', []rune{'a'}, []rune{'b', '\\', 'a'})
if err != nil {
t.Error(err)
return
}
if string(u) != "ba" {
t.Error("unescape failed")
}
})
}
func TestEscape(t *testing.T) { func TestEscape(t *testing.T) {
t.Skip()
const ( const (
banned = "\b\f\n\r\t\v" banned = "\b\f\n\r\t\v"
unescaped = "\b\f\n\r\t\v" unescaped = "\b\f\n\r\t\v"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -731,7 +731,8 @@ func (c *context) finalizeParse(root parser) error {
type Node struct { type Node struct {
Name string Name string
Nodes []*Node Nodes []*Node
From, To int From int
To int
tokens []rune tokens []rune
} }

View File

@ -285,7 +285,7 @@ func jsonTreeToJSON(n *Node) (interface{}, error) {
} }
func TestJSON(t *testing.T) { func TestJSON(t *testing.T) {
runTestsFile(t, "examples/json.treerack", []testItem{{ runTestsFile(t, "docs/examples/json.treerack", []testItem{{
title: "true", title: "true",
text: "true", text: "true",
node: &Node{ node: &Node{
@ -509,7 +509,7 @@ func TestRandomJSON(t *testing.T) {
buf := bytes.NewBuffer(b) buf := bytes.NewBuffer(b)
s, err := openSyntaxFile("examples/json.treerack") s, err := openSyntaxFile("docs/examples/json.treerack")
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return return

View File

@ -3,7 +3,7 @@ package treerack
import "testing" import "testing"
func TestKeyVal(t *testing.T) { func TestKeyVal(t *testing.T) {
runTestsFile(t, "examples/keyval.treerack", []testItem{{ runTestsFile(t, "docs/examples/keyval.treerack", []testItem{{
title: "empty", title: "empty",
}, { }, {
title: "a comment", title: "a comment",

View File

@ -4,20 +4,53 @@ import "testing"
func TestKeyword(t *testing.T) { func TestKeyword(t *testing.T) {
const syntax = ` const syntax = `
keywords:kw = "foo" | "bar"; space:ws = " ";
symbol:nokw = [a-z]+; keyword:kw = "foo" | "bar";
symbol:nokw:nows = [a-z]+;
doc:root = (keyword | symbol)*;
` `
runTests(t, syntax, []testItem{{ runTests(t, syntax, []testItem{{
title: "keyword",
text: "foo",
fail: true,
}, {
title: "not keyword", title: "not keyword",
text: "baz", text: "baz",
ignorePosition: true, ignorePosition: true,
node: &Node{ node: &Node{
Name: "doc",
Nodes: []*Node{{
Name: "symbol", Name: "symbol",
}},
},
}, {
title: "keyword",
text: "foo",
ignorePosition: true,
node: &Node{
Name: "doc",
Nodes: []*Node{{
Name: "keyword",
}},
},
}, {
title: "mixed",
text: "foo bar baz bar foo baz bar",
ignorePosition: true,
node: &Node{
Name: "doc",
Nodes: []*Node{{
Name: "keyword",
}, {
Name: "keyword",
}, {
Name: "symbol",
}, {
Name: "keyword",
}, {
Name: "keyword",
}, {
Name: "symbol",
}, {
Name: "keyword",
}},
}, },
}}) }})
} }

View File

@ -3,7 +3,7 @@ package treerack
import "testing" import "testing"
func TestMML(t *testing.T) { func TestMML(t *testing.T) {
s, err := openSyntaxFile("examples/mml.treerack") s, err := openSyntaxFile("docs/examples/mml.treerack")
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return return

View File

@ -5,7 +5,7 @@ import (
) )
func TestMMLExp2(t *testing.T) { func TestMMLExp2(t *testing.T) {
s, err := openSyntaxFile("examples/mml-exp2.treerack") s, err := openSyntaxFile("docs/examples/mml-exp2.treerack")
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return return

View File

@ -5,7 +5,7 @@ import (
) )
func TestMMLExp3(t *testing.T) { func TestMMLExp3(t *testing.T) {
s, err := openSyntaxFile("examples/mml-exp3.treerack") s, err := openSyntaxFile("docs/examples/mml-exp3.treerack")
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return return

View File

@ -9,7 +9,7 @@ import (
) )
func TestMMLExp(t *testing.T) { func TestMMLExp(t *testing.T) {
s, err := openSyntaxFile("examples/mml-exp.treerack") s, err := openSyntaxFile("docs/examples/mml-exp.treerack")
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return return
@ -2987,7 +2987,7 @@ func TestMMLFile(t *testing.T) {
const n = 180 const n = 180
s, err := openSyntaxFile("examples/mml-exp.treerack") s, err := openSyntaxFile("docs/examples/mml-exp.treerack")
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return return
@ -2995,7 +2995,7 @@ func TestMMLFile(t *testing.T) {
s.Init() s.Init()
f, err := os.Open("examples/test.mml") f, err := os.Open("docs/examples/test.mml")
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return return

View File

@ -2,21 +2,39 @@ package treerack
import "fmt" import "fmt"
// Node represents a distinct element in the resulting Abstract Syntax Tree (AST) following a successful parse.
// Every named parser that is not an Alias or Whitespace yields a Node.
type Node struct { type Node struct {
// Name is the identifier of the parser that generated this node.
Name string Name string
// Nodes contains the child nodes representing the substructures of this node.
Nodes []*Node Nodes []*Node
From, To int
// From is the inclusive character offset of the starting position in the input stream.
From int
// To is the exclusive character offset of the ending position in the input stream.
To int
tokens []rune tokens []rune
} }
// Tokens returns the raw slice of runes from the input stream represented by this node.
//
// Note: This returns a reference to the underlying buffer, not a copy. It should not be modified.
func (n *Node) Tokens() []rune { func (n *Node) Tokens() []rune {
return n.tokens return n.tokens
} }
// String returns the string representation of the node, including its name, position range (From/To), and the
// captured text content.
func (n *Node) String() string { func (n *Node) String() string {
return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text()) return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text())
} }
// Text returns the actual string segment from the input stream represented by this node.
func (n *Node) Text() string { func (n *Node) Text() string {
return string(n.Tokens()[n.From:n.To]) return string(n.Tokens()[n.From:n.To])
} }

View File

@ -1,9 +1,65 @@
# treerack # treerack
[WIP] A generic parser generator for Go. **A parser generator for Go.**
### Examples Treerack defines and generates recursive descent parsers for arbitrary syntaxes, processing input content into
its Abstract Syntax Tree (AST) representation. It utilizes a custom syntax definition format derived from EBNF
(Extended Backus-Naur Form), allowing for clear and concise grammar descriptions.
- JSON: https://code.squareroundforest.org/arpio/treerack/blob/master/examples/json.treerack ## Examples
- Scheme: https://code.squareroundforest.org/arpio/treerack/blob/master/examples/scheme.treerack
- Treerack (itself): https://code.squareroundforest.org/arpio/treerack/blob/master/syntax.treerack - **JSON**: [docs/examples/json.treerack](docs/examples/json.treerack)
- **Scheme**: [docs/examples/scheme.treerack](docs/examples/scheme.treerack)
- **Treerack (self-definition)**: [syntax.treerack](syntax.treerack)
## Overview
Treerack operates without a separate lexing phase, parsing character streams directly to produce an AST. The
syntax language supports recursive references, enabling the definition of context-free grammars.
We can define syntaxes during development and use the provided tool to generate static Go code, which is then
built into the application. Alternatively, the library supports loading syntaxes dynamically at runtime.
## Installation
From source:
```
git clone https://code.squareroundforest.org/arpio/treerack
cd treerack
make install
```
Alternatively:
```
go install code.squareroundforest.org/arpio/treerack/cmd/treerack
```
## Documentation
- [Manual](docs/manual.md): A guide to the main use cases supported by Treerack.
- [Syntax Definition](docs/syntax.md): Detailed reference for the Treerack definition language.
- [Library Documentation](https://godocs.io/code.squareroundforest.org/arpio/treerack): GoDoc reference for the
runtime library.
## Developer Notes
We use a Makefile to manage the build and verification lifecycle.
Important: Generating the parser for the Treerack syntax itself (bootstrapping) requires multiple phases.
Consequently, running standard go build or go test commands may miss subtle consistency problems.
The authoritative way to verify changes is via the makefile:
```
make check
```
## Limitations
- Lexer & UTF-8: Treerack does not require a lexer, which simplifies the architecture. However, this enforces
the use of UTF-8 input. We have considered support for custom tokenizers as a potential future improvement.
- Whitespace Delimited Languages: Due to the recursive descent nature and the lack of a dedicated lexer state,
defining whitespace-delimited syntaxes (such as Python-style indentation) can be difficult to achieve with the
current feature set.

View File

@ -3,7 +3,7 @@ package treerack
import "testing" import "testing"
func TestScheme(t *testing.T) { func TestScheme(t *testing.T) {
runTestsFile(t, "examples/scheme.treerack", []testItem{{ runTestsFile(t, "docs/examples/scheme.treerack", []testItem{{
title: "empty", title: "empty",
}, { }, {
title: "a function", title: "a function",

View File

@ -140,5 +140,10 @@ func main() {
varName = "headCodeExported" varName = "headCodeExported"
} }
fmt.Printf("package %s\n\n// generated with scripts/createhead.go\nconst %s=%s", packageName, varName, quotedCode) fmt.Printf(
"package %s\n\n// generated with scripts/createhead.go\nconst %s=%s",
packageName,
varName,
quotedCode,
)
} }

View File

@ -3,7 +3,7 @@ package treerack
import "testing" import "testing"
func TestSExpr(t *testing.T) { func TestSExpr(t *testing.T) {
runTestsFile(t, "examples/sexpr.treerack", []testItem{{ runTestsFile(t, "docs/examples/sexpr.treerack", []testItem{{
title: "number", title: "number",
text: "42", text: "42",
nodes: []*Node{{ nodes: []*Node{{

View File

@ -1,3 +1,18 @@
// Package treerack provides a parser generator for defining and interacting with arbitrary syntaxes.
//
// Treerack allows developers to define grammars - programmatically or via a syntax definition language
// derivative of EBNF — and generate recursive descent parsers. These parsers process input content and produce
// an Abstract Syntax Tree (AST) representation.
//
// The library supports two primary workflows:
//
// 1. Dynamic (Runtime): Loading or defining syntaxes programmatically at runtime to parse input immediately.
//
// 2. Static (Generation): Defining syntaxes during development and generating Go source code to be compiled
// into the application.
//
// For detailed syntax definition rules and the command-line tool usage, please refer to the repository
// documentation: https://code.squareroundforest.org/arpio/treerack
package treerack package treerack
import ( import (
@ -7,14 +22,37 @@ import (
"io" "io"
) )
// if min=0&&max=0, it means min=1,max=1 // SequenceItem represents a single element within a sequence definition, referencing another parser by name.
// else if max<=0, it means no max //
// else if min<=0, it means no min // Cardinality logic for SequenceItem:
//
// - If Min=0 and Max=0: Matches exactly once (equivalent to Min=1, Max=1).
//
// - If Max <= 0: Unbounded upper limit (matches Min or more times).
//
// - If Min <= 0: No lower limit (matches 0 to Max times).
type SequenceItem struct { type SequenceItem struct {
// Name is the identifier of the referenced parser definition.
Name string Name string
Min, Max int
// Min specifies the minimum required occurrences of the item.
Min int
// Max specifies the maximum accepted occurrences of the item.
Max int
} }
// Syntax represents a complete grammar definition consisting of multiple named parsers.
//
// The lifecycle of a Syntax instance consists of three phases:
//
// 1. Definition: Define parsers using methods like AnyChar, Sequence, and Choice, or load a definition via
// ReadSyntax.
//
// 2. Initialization: Call Init() to validate definitions, resolve references, and seal the syntax.
//
// 3. Execution: Use Parse() to process input or Generate() to create Go source code.
type Syntax struct { type Syntax struct {
registry *registry registry *registry
initialized bool initialized bool
@ -24,8 +62,14 @@ type Syntax struct {
root definition root definition
} }
// GeneratorOptions control the behavior of the Go code generator.
type GeneratorOptions struct { type GeneratorOptions struct {
// PackageName sets the package name for the generated source file. Defaults to main.
PackageName string PackageName string
// Export determines whether the generated Parse function is exported (public) or unexported (private)
// within the package.
Export bool Export bool
} }
@ -51,10 +95,17 @@ type definition interface {
} }
var ( var (
// ErrSyntaxInitialized is returned when attempting to modify a syntax that has already been initialized.
ErrSyntaxInitialized = errors.New("syntax initialized") ErrSyntaxInitialized = errors.New("syntax initialized")
// ErrNoParsersDefined is returned when attempting to initialize a syntax containing no parser definitions.
ErrNoParsersDefined = errors.New("no parsers defined") ErrNoParsersDefined = errors.New("no parsers defined")
ErrInvalidEscapeCharacter = errors.New("invalid escape character")
// ErrMultipleRoots is returned when a syntax definition contains multiple explicit root parsers.
ErrMultipleRoots = errors.New("multiple roots") ErrMultipleRoots = errors.New("multiple roots")
// ErrInvalidSymbolName is returned when a named parser is assigned an invalid identifier.
ErrInvalidSymbolName = errors.New("invalid symbol name") ErrInvalidSymbolName = errors.New("invalid symbol name")
) )
@ -193,6 +244,7 @@ func (s *Syntax) anyChar(name string, ct CommitType) error {
return s.class(name, ct, true, nil, nil) return s.class(name, ct, true, nil, nil)
} }
// AnyChar registers a parser that accepts any single character (a wildcard).
func (s *Syntax) AnyChar(name string, ct CommitType) error { func (s *Syntax) AnyChar(name string, ct CommitType) error {
if !isValidSymbol(name) { if !isValidSymbol(name) {
return ErrInvalidSymbolName return ErrInvalidSymbolName
@ -223,6 +275,8 @@ func (s *Syntax) class(name string, ct CommitType, not bool, chars []rune, range
return s.sequence(name, ct, SequenceItem{Name: cname}) return s.sequence(name, ct, SequenceItem{Name: cname})
} }
// Class registers a character class parser, accepting characters defined in the specific list or ranges. If
// 'not' is true, it matches any character *except* those defined.
func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error { func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error {
if !isValidSymbol(name) { if !isValidSymbol(name) {
return ErrInvalidSymbolName return ErrInvalidSymbolName
@ -244,6 +298,7 @@ func (s *Syntax) charSequence(name string, ct CommitType, chars []rune) error {
return s.sequence(name, ct|NoWhitespace, namesToSequenceItems(refs)...) return s.sequence(name, ct|NoWhitespace, namesToSequenceItems(refs)...)
} }
// CharSequence registers a parser that matches a specific string literal (e.g., "foo").
func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error { func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error {
if !isValidSymbol(name) { if !isValidSymbol(name) {
return ErrInvalidSymbolName return ErrInvalidSymbolName
@ -256,6 +311,7 @@ func (s *Syntax) sequence(name string, ct CommitType, items ...SequenceItem) err
return s.register(newSequence(name, ct, items)) return s.register(newSequence(name, ct, items))
} }
// Sequence registers a parser that matches a specific order of other named parsers (defined as SequenceItems).
func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) error { func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) error {
if !isValidSymbol(name) { if !isValidSymbol(name) {
return ErrInvalidSymbolName return ErrInvalidSymbolName
@ -268,6 +324,7 @@ func (s *Syntax) choice(name string, ct CommitType, options ...string) error {
return s.register(newChoice(name, ct, options)) return s.register(newChoice(name, ct, options))
} }
// Choice registers a parser that matches exactly one of the provided named options.
func (s *Syntax) Choice(name string, ct CommitType, options ...string) error { func (s *Syntax) Choice(name string, ct CommitType, options ...string) error {
if !isValidSymbol(name) { if !isValidSymbol(name) {
return ErrInvalidSymbolName return ErrInvalidSymbolName
@ -276,6 +333,7 @@ func (s *Syntax) Choice(name string, ct CommitType, options ...string) error {
return s.choice(name, ct|userDefined, options...) return s.choice(name, ct|userDefined, options...)
} }
// ReadSyntax loads a grammar definition from a reader using the Treerack syntax format.
func (s *Syntax) ReadSyntax(r io.Reader) error { func (s *Syntax) ReadSyntax(r io.Reader) error {
if s.initialized { if s.initialized {
return ErrSyntaxInitialized return ErrSyntaxInitialized
@ -302,6 +360,8 @@ func (s *Syntax) ReadSyntax(r io.Reader) error {
return define(s, n) return define(s, n)
} }
// Init validates, initializes, and seals the syntax. This method must be called exactly once before Parsing or
// Generating.
func (s *Syntax) Init() error { func (s *Syntax) Init() error {
if s.errInitFailed != nil { if s.errInitFailed != nil {
return s.errInitFailed return s.errInitFailed
@ -359,6 +419,7 @@ func (s *Syntax) keywordParsers() []parser {
return p return p
} }
// Generate writes Go source code implementing the parser to the provided writer.
func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error { func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error {
if err := s.Init(); err != nil { if err := s.Init(); err != nil {
return err return err
@ -454,6 +515,7 @@ func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error {
return nil return nil
} }
// Parse reads from the input stream and constructs an AST based on the defined syntax.
func (s *Syntax) Parse(r io.Reader) (*Node, error) { func (s *Syntax) Parse(r io.Reader) (*Node, error) {
if err := s.Init(); err != nil { if err := s.Init(); err != nil {
return nil, err return nil, err

View File

@ -7,16 +7,39 @@ import (
"io" "io"
) )
// CommitType controls how the output of a named parser is handled and represented in the resulting AST.
type CommitType int type CommitType int
const ( const (
// None indicates the default behavior: parsed segments are represented as named nodes in the AST.
// Whitespace handling inherits the syntax's global settings.
None CommitType = 0 None CommitType = 0
// Alias treats the parser as a pass-through. Validated segments are included in the AST node of the
// enclosing parser rather than creating a distinct node.
Alias CommitType = 1 << iota Alias CommitType = 1 << iota
// Whitespace designates a parser (typically a character sequence) to be treated as ignored whitespace
// throughout the input.
Whitespace Whitespace
// NoWhitespace prevents the automatic skipping of defined whitespace characters within specific sequences.
NoWhitespace NoWhitespace
// Keyword marks a sequence as a reserved keyword. This allows specific sequences to be protected or
// restricted in certain contexts via the NoKeyword flag.
Keyword Keyword
// NoKeyword prevents the parser from matching sequences marked as Keywords.
NoKeyword NoKeyword
// FailPass configures the parser to propagate failure up to the enclosing parser rather than handling it
// locally.
FailPass FailPass
// Root explicitly marks the parser as the root of the syntax. By default, the last defined parser is
// considered the root.
Root Root
userDefined userDefined
@ -30,30 +53,24 @@ const (
formatIncludeComments formatIncludeComments
) )
// ParseError is returned when the input text doesn't match // ParseError reports a failure to match the input text against the defined syntax.
// the used syntax during parsing.
type ParseError struct { type ParseError struct {
// Input is the name of the input file or <input> if not // Input denotes the name of the input source (e.g., filename), or "<input>" if unavailable.
// available.
Input string Input string
// Offset is the index of the right-most failing // Offset is the index of the right-most token where the parse failed.
// token in the input text.
Offset int Offset int
// Line tells the line index of the right-most failing // Line is the zero-based line number of the failure position.
// token in the input text.
// //
// It is zero-based, and for error reporting, it is // For display purposes, increment by one.
// recommended to increment it by one.
Line int Line int
// Column tells the column index of the right-most failing // Column is the zero-based column index of the failure position.
// token in the input text.
Column int Column int
// Definition tells the right-most unmatched parser definition. // Definition identifies the name of the specific parser definition where the match failed.
Definition string Definition string
} }
@ -70,8 +87,10 @@ type builder interface {
build(*context) ([]*Node, bool) build(*context) ([]*Node, bool)
} }
// ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences.
var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character") var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
// Error returns the formatted failure message.
func (pe *ParseError) Error() string { func (pe *ParseError) Error() string {
return fmt.Sprintf( return fmt.Sprintf(
"%s:%d:%d:parse failed, parsing: %s", "%s:%d:%d:parse failed, parsing: %s",