documentation
This commit is contained in:
parent
dd6cdb1aac
commit
4c6c817431
4
Makefile
4
Makefile
@ -52,10 +52,12 @@ headexported.go: .build/headexported.go
|
||||
cp .build/headexported.go .
|
||||
|
||||
.build/self.go: $(sources) $(parsers) head.go headexported.go .build
|
||||
# since generator code depends on the syntax itself, we need to passes:
|
||||
# since the generator code depends on the syntax itself, and such influences its own output, we need two
|
||||
# passes:
|
||||
go build -o .build/treerack.current ./cmd/treerack
|
||||
.build/treerack.current generate --export --package-name self < syntax.treerack > .build/self.go
|
||||
go fmt .build/self.go
|
||||
# we backup the original and apply the new:
|
||||
cp internal/self/self.go .build/self.go.backup
|
||||
cp .build/self.go internal/self
|
||||
# second pass:
|
||||
|
||||
9
buzz.txt
9
buzz.txt
@ -1,9 +0,0 @@
|
||||
generator, in-process init or command line
|
||||
syntax from text or defined during runtime, or combined
|
||||
simple syntax with recursion
|
||||
no lexer required
|
||||
utf8, 8bit or custom tokens
|
||||
abstract syntax tree from text of arbitrary syntax
|
||||
reading from streams
|
||||
context free, however support for custom tokens in the input
|
||||
custom tokens for indentation built in
|
||||
@ -2,49 +2,48 @@
|
||||
Generated with https://code.squareroundforest.org/arpio/docreflect
|
||||
*/
|
||||
|
||||
|
||||
package main
|
||||
|
||||
import "code.squareroundforest.org/arpio/docreflect"
|
||||
|
||||
func init() {
|
||||
docreflect.Register("main", "")
|
||||
docreflect.Register("main.check", "check parses input content against the provided syntax definition and fails if the input does not match.\nSyntax can be provided via a filename option or an inline string option. Input can be provided via a filename\noption, a positional argument filename, an inline string option, or piped from standard input.\n\nfunc(o, stdin, args)")
|
||||
docreflect.Register("main.checkOptions", "")
|
||||
docreflect.Register("main.checkOptions.Input", "Input specifies the filename of the input content to be validated.\n")
|
||||
docreflect.Register("main.checkOptions.InputString", "InputString specifies the input content as an inline string.\n")
|
||||
docreflect.Register("main.checkOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||
docreflect.Register("main.checkOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||
docreflect.Register("main.checkSyntax", "checkSyntax validates a syntax definition. The syntax may be provided via a file path (using an option or a\npositional argument), an inline string, or piped from standard input.\n\nfunc(o, stdin, args)")
|
||||
docreflect.Register("main.checkSyntaxOptions", "")
|
||||
docreflect.Register("main.checkSyntaxOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||
docreflect.Register("main.checkSyntaxOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||
docreflect.Register("main.errInvalidFilename", "")
|
||||
docreflect.Register("main.errMultipleInputs", "")
|
||||
docreflect.Register("main.errNoInput", "")
|
||||
docreflect.Register("main.generate", "generate generates Go code that can parse arbitrary input with the provided syntax, and can be used embedded\nin an application.\n\nThe syntax may be provided via a file path (using an option or a positional argument), an\ninline string, or piped from standard input.\n\nfunc(o, stdin, stdout, args)")
|
||||
docreflect.Register("main.generateOptions", "")
|
||||
docreflect.Register("main.generateOptions.Export", "Export determines whether the generated parse function is exported (visible outside its package).\n")
|
||||
docreflect.Register("main.generateOptions.PackageName", "PackageName specifies the package name for the generated code. Defaults to main.\n")
|
||||
docreflect.Register("main.generateOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||
docreflect.Register("main.generateOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||
docreflect.Register("main.init", "\nfunc()")
|
||||
docreflect.Register("main.initInput", "\nfunc(filename, stringValue, stdin, args)")
|
||||
docreflect.Register("main.main", "\nfunc()")
|
||||
docreflect.Register("main.mapNode", "\nfunc(n)")
|
||||
docreflect.Register("main.node", "")
|
||||
docreflect.Register("main.node.From", "")
|
||||
docreflect.Register("main.node.Name", "")
|
||||
docreflect.Register("main.node.Nodes", "")
|
||||
docreflect.Register("main.node.Text", "")
|
||||
docreflect.Register("main.node.To", "")
|
||||
docreflect.Register("main.noop", "\nfunc()")
|
||||
docreflect.Register("main.show", "show input content against a provided syntax definition and outputs the resulting AST (Abstract Syntax Tree)\nin JSON format. Syntax can be provided via a filename option or an inline string option. Input can be\nprovided via a filename option, a positional argument filename, an inline string option, or piped from\nstandard input.\n\nfunc(o, stdin, stdout, args)")
|
||||
docreflect.Register("main.showOptions", "")
|
||||
docreflect.Register("main.showOptions.Indent", "Indent specifies a custom indentation string for the output.\n")
|
||||
docreflect.Register("main.showOptions.Input", "Input specifies the filename of the input content to be validated.\n")
|
||||
docreflect.Register("main.showOptions.InputString", "InputString specifies the input content as an inline string.\n")
|
||||
docreflect.Register("main.showOptions.Pretty", "Pretty enables indented, human-readable output.\n")
|
||||
docreflect.Register("main.showOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||
docreflect.Register("main.showOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||
docreflect.Register("main.version", "")
|
||||
}
|
||||
docreflect.Register("main", "")
|
||||
docreflect.Register("main.check", "check parses input content against the provided syntax definition and fails if the input does not match.\nSyntax can be provided via a filename option or an inline string option. Input can be provided via a filename\noption, a positional argument filename, an inline string option, or piped from standard input.\n\nfunc(o, stdin, args)")
|
||||
docreflect.Register("main.checkOptions", "")
|
||||
docreflect.Register("main.checkOptions.Input", "Input specifies the filename of the input content to be validated.\n")
|
||||
docreflect.Register("main.checkOptions.InputString", "InputString specifies the input content as an inline string.\n")
|
||||
docreflect.Register("main.checkOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||
docreflect.Register("main.checkOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||
docreflect.Register("main.checkSyntax", "checkSyntax validates a syntax definition. The syntax may be provided via a file path (using an option or a\npositional argument), an inline string, or piped from standard input.\n\nfunc(o, stdin, args)")
|
||||
docreflect.Register("main.checkSyntaxOptions", "")
|
||||
docreflect.Register("main.checkSyntaxOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||
docreflect.Register("main.checkSyntaxOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||
docreflect.Register("main.errInvalidFilename", "")
|
||||
docreflect.Register("main.errMultipleInputs", "")
|
||||
docreflect.Register("main.errNoInput", "")
|
||||
docreflect.Register("main.generate", "generate generates Go code that can parse arbitrary input with the provided syntax, and can be used embedded\nin an application.\n\nThe syntax may be provided via a file path (using an option or a positional argument), an\ninline string, or piped from standard input.\n\nfunc(o, stdin, stdout, args)")
|
||||
docreflect.Register("main.generateOptions", "")
|
||||
docreflect.Register("main.generateOptions.Export", "Export determines whether the generated parse function is exported (visible outside its package).\n")
|
||||
docreflect.Register("main.generateOptions.PackageName", "PackageName specifies the package name for the generated code. Defaults to main.\n")
|
||||
docreflect.Register("main.generateOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||
docreflect.Register("main.generateOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||
docreflect.Register("main.init", "\nfunc()")
|
||||
docreflect.Register("main.initInput", "\nfunc(filename, stringValue, stdin, args)")
|
||||
docreflect.Register("main.main", "\nfunc()")
|
||||
docreflect.Register("main.mapNode", "\nfunc(n)")
|
||||
docreflect.Register("main.node", "")
|
||||
docreflect.Register("main.node.From", "")
|
||||
docreflect.Register("main.node.Name", "")
|
||||
docreflect.Register("main.node.Nodes", "")
|
||||
docreflect.Register("main.node.Text", "")
|
||||
docreflect.Register("main.node.To", "")
|
||||
docreflect.Register("main.noop", "\nfunc()")
|
||||
docreflect.Register("main.show", "show input content against a provided syntax definition and outputs the resulting AST (Abstract Syntax Tree)\nin JSON format. Syntax can be provided via a filename option or an inline string option. Input can be\nprovided via a filename option, a positional argument filename, an inline string option, or piped from\nstandard input.\n\nfunc(o, stdin, stdout, args)")
|
||||
docreflect.Register("main.showOptions", "")
|
||||
docreflect.Register("main.showOptions.Indent", "Indent specifies a custom indentation string for the output.\n")
|
||||
docreflect.Register("main.showOptions.Input", "Input specifies the filename of the input content to be validated.\n")
|
||||
docreflect.Register("main.showOptions.InputString", "InputString specifies the input content as an inline string.\n")
|
||||
docreflect.Register("main.showOptions.Pretty", "Pretty enables indented, human-readable output.\n")
|
||||
docreflect.Register("main.showOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||
docreflect.Register("main.showOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||
docreflect.Register("main.version", "")
|
||||
}
|
||||
@ -105,5 +105,9 @@ func show(o showOptions, stdin io.Reader, stdout io.Writer, args ...string) erro
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := stdout.Write([]byte{'\n'}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -128,7 +128,7 @@ func TestShow(t *testing.T) {
|
||||
t.Fatal(nil)
|
||||
}
|
||||
|
||||
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` {
|
||||
if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
|
||||
t.Fatal(out.String())
|
||||
}
|
||||
})
|
||||
@ -144,7 +144,7 @@ func TestShow(t *testing.T) {
|
||||
t.Fatal(nil)
|
||||
}
|
||||
|
||||
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` {
|
||||
if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
|
||||
t.Fatal(out.String())
|
||||
}
|
||||
})
|
||||
@ -159,7 +159,7 @@ func TestShow(t *testing.T) {
|
||||
t.Fatal(nil)
|
||||
}
|
||||
|
||||
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` {
|
||||
if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
|
||||
t.Fatal(out.String())
|
||||
}
|
||||
})
|
||||
@ -172,7 +172,7 @@ func TestShow(t *testing.T) {
|
||||
t.Fatal(nil)
|
||||
}
|
||||
|
||||
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` {
|
||||
if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
|
||||
t.Fatal(out.String())
|
||||
}
|
||||
})
|
||||
@ -189,7 +189,7 @@ func TestShow(t *testing.T) {
|
||||
t.Fatal(nil)
|
||||
}
|
||||
|
||||
const expect = "{\n \"name\": \"foo\",\n \"from\": 0,\n \"to\": 3,\n \"text\": \"bar\"\n}"
|
||||
const expect = "{\n \"name\": \"foo\",\n \"from\": 0,\n \"to\": 3,\n \"text\": \"bar\"\n}\n"
|
||||
if out.String() != expect {
|
||||
t.Fatal(out.String())
|
||||
}
|
||||
@ -207,7 +207,7 @@ func TestShow(t *testing.T) {
|
||||
t.Fatal(nil)
|
||||
}
|
||||
|
||||
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}" {
|
||||
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}\n" {
|
||||
t.Fatal(out.String())
|
||||
}
|
||||
})
|
||||
@ -225,7 +225,7 @@ func TestShow(t *testing.T) {
|
||||
t.Fatal(nil)
|
||||
}
|
||||
|
||||
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}" {
|
||||
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}\n" {
|
||||
t.Fatal(out.String())
|
||||
}
|
||||
})
|
||||
|
||||
47
docs/examples/acalc/acalc.treerack
Normal file
47
docs/examples/acalc/acalc.treerack
Normal file
@ -0,0 +1,47 @@
|
||||
// first define our whitespace chars:
|
||||
ignore:ws = " " | [\t] | [\r] | [\n];
|
||||
|
||||
// define the format of input numbers. With the :nows flag we declare that we don't expect ignored spaces
|
||||
// between the digits and the delimiters. We support integers, floating point numbers, and floating point
|
||||
// numbers with their exponential notation. We don't support arbitrary leading zeros to avoid confusion with the
|
||||
// octal representation of numbers, which is not supported here.
|
||||
num:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?;
|
||||
|
||||
// define the supported operators:
|
||||
add = "+";
|
||||
sub = "-";
|
||||
mul = "*";
|
||||
div = "/";
|
||||
|
||||
// let's define grouping. Any expression can be grouped. The definition of the expression can be found further
|
||||
// down in the syntax document. This usage of the expression reference is also a good example for recursive
|
||||
// definitions. Using the :alias flag prevents generating a separate node in the resulting AST.
|
||||
group:alias = "(" expression ")";
|
||||
|
||||
// we group the operators by precedence. This is necessary to parse the expressions like a * b + c in a structure
|
||||
// that is equivalent to (a * b) + c.
|
||||
op0:alias = mul | div;
|
||||
op1:alias = add | sub;
|
||||
|
||||
// we also define which operands can be used at which precedence level. Notice, how operand1 also allows binary0
|
||||
// expressions.
|
||||
operand0:alias = num | group;
|
||||
operand1:alias = operand0 | binary0;
|
||||
|
||||
// using the prioritized operators, we can define the prioritized binary expressions. We support a + b + c, and
|
||||
// not only a + b.
|
||||
binary0 = operand0 (op0 operand0)+;
|
||||
binary1 = operand1 (op1 operand1)+;
|
||||
binary:alias = binary0 | binary1;
|
||||
|
||||
// let's define, what an expression can be. Notice the recursion along expression and group.
|
||||
expression:alias = num | group | binary;
|
||||
|
||||
// finally, define the root of the parser, the result of the arithmetic expression. It can be any expression,
|
||||
// but since we used the :alias flag for the expression definition, we need to add a non-alias parser that will
|
||||
// represent the root of the resulting AST. This also allows us to define an "exit" token, which can be used
|
||||
// exit from the REPL loop of our application.
|
||||
//
|
||||
// Note that we don't need to use the :root flag here, because it is our last definition, and this means that
|
||||
// the expression is the root parser of the syntax.
|
||||
result = expression | "exit"
|
||||
3
docs/examples/acalc/go.mod
Normal file
3
docs/examples/acalc/go.mod
Normal file
@ -0,0 +1,3 @@
|
||||
module acalac
|
||||
|
||||
go 1.25.4
|
||||
143
docs/examples/acalc/main.go
Normal file
143
docs/examples/acalc/main.go
Normal file
@ -0,0 +1,143 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var errExit = errors.New("exit")
|
||||
|
||||
func repl(input io.Reader, output io.Writer) {
|
||||
// use buffered io, to be able to read the input line-by-line:
|
||||
buf := bufio.NewReader(os.Stdin)
|
||||
|
||||
// our REPL loop:
|
||||
for {
|
||||
// print a basic prompt:
|
||||
if _, err := output.Write([]byte("> ")); err != nil {
|
||||
|
||||
// we cannot fix it if there is an error here:
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
// read the input and handle the errors:
|
||||
expr, err := read(buf)
|
||||
|
||||
// when EOF, that means the user pressed Ctrl+D. Let's terminate the output with a conventional newline
|
||||
// and exit:
|
||||
if errors.Is(err, io.EOF) {
|
||||
output.Write([]byte{'\n'})
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// when errExit, that means the user entered exit:
|
||||
if errors.Is(err, errExit) {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// if it's a parser error, we print and continue from reading again, to allow the user to fix the
|
||||
// problem:
|
||||
var perr *parseError
|
||||
if errors.As(err, &perr) {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
// in case of any other error, we don't know what's going on, so we get out of here right away:
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
// if we received an expression, then we can evaluate it. We are not expecting errors here:
|
||||
result := eval(expr)
|
||||
|
||||
// we have the result, we need to print it:
|
||||
if err := print(output, result); err != nil {
|
||||
|
||||
// if printing fails, we don't know how to fix it, so we get out of here:
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func read(input *bufio.Reader) (*node, error) {
|
||||
line, err := input.ReadString('\n')
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// expr will be of type *node, which type is defined in the generated code
|
||||
expr, err := parse(bytes.NewBufferString(line))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if strings.TrimSpace(expr.Text()) == "exit" {
|
||||
return nil, errExit
|
||||
}
|
||||
|
||||
// we know based on the syntax, that the top level node will always have a single child, either a number
|
||||
// literal or a binary operation:
|
||||
return expr.Nodes[0], nil
|
||||
}
|
||||
|
||||
// eval always returns the calculated result as a float64:
|
||||
func eval(expr *node) float64 {
|
||||
|
||||
// we know that it's either a number or a binary operation:
|
||||
var value float64
|
||||
switch expr.Name {
|
||||
case "num":
|
||||
|
||||
// the number format in our syntax is based on the JSON spec, so we can piggy-back on it for the number
|
||||
// parsing. In a real application, we would need to handle the errors here anyway, even if our parser
|
||||
// already validated the input:
|
||||
json.Unmarshal([]byte(expr.Text()), &value)
|
||||
return value
|
||||
default:
|
||||
|
||||
// we know that the first node is either a number of a child expression:
|
||||
value, expr.Nodes = eval(expr.Nodes[0]), expr.Nodes[1:]
|
||||
|
||||
// we don't need to track back, so we can drop the processed nodes while consuming them:
|
||||
for len(expr.Nodes) > 0 {
|
||||
var (
|
||||
operator string
|
||||
operand float64
|
||||
)
|
||||
|
||||
operator, operand, expr.Nodes = expr.Nodes[0].Name, eval(expr.Nodes[1]), expr.Nodes[2:]
|
||||
switch operator {
|
||||
case "add":
|
||||
value += operand
|
||||
case "sub":
|
||||
value -= operand
|
||||
case "mul":
|
||||
value *= operand
|
||||
case "div":
|
||||
// Go returns -Inf or +Inf on division by zero:
|
||||
value /= operand
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
func print(output io.Writer, result float64) error {
|
||||
_, err := fmt.Fprintln(output, result)
|
||||
return err
|
||||
}
|
||||
|
||||
func main() {
|
||||
// for testability, we define the REPL loop in a separate function so that the test code can call it with
|
||||
// in-memory buffers as input and output. Our main function calls it with the stdio handles:
|
||||
repl(os.Stdin, os.Stdout)
|
||||
}
|
||||
824
docs/examples/acalc/parser.go
Normal file
824
docs/examples/acalc/parser.go
Normal file
File diff suppressed because one or more lines are too long
629
docs/manual.md
Normal file
629
docs/manual.md
Normal file
@ -0,0 +1,629 @@
|
||||
# Treerack Manual
|
||||
|
||||
This manual describes the primary use cases and workflows supported by Treerack.
|
||||
|
||||
## Prerequisits
|
||||
|
||||
We assume a working installation of the standard Go tooling.
|
||||
|
||||
This manual relies on the treerack command-line tool. We can install it using one of the following methods.
|
||||
|
||||
**A. source installation (requires make):**
|
||||
|
||||
1. clone the repository `git clone https://code.squareroundforest.org/arpio/treerack`
|
||||
2. navigate to the source directory, run: `make install`. To install it to a custom location, use the `prefix`
|
||||
environment variable, e.g. run `prefix=~/.local make install`
|
||||
3. verify the installation: run `treerack version` and `man treerack`
|
||||
|
||||
**B. via go install:**
|
||||
|
||||
Alternatively, we _may be able to_ install directly using the Go toolchain:
|
||||
|
||||
1. run `go install code.squareroundforest.org/arpio/treerack/cmd/treerack`
|
||||
2. verify: `treerack help`
|
||||
|
||||
## Hello syntax
|
||||
|
||||
A basic syntax definition looks like this:
|
||||
|
||||
```
|
||||
hello = "Hello, world!"
|
||||
```
|
||||
|
||||
This definition matches only the exact string "Hello, world!" and nothing else. To test the validity of this
|
||||
rule, run:
|
||||
|
||||
```
|
||||
treerack check-syntax --syntax-string 'hello = "Hello, world!"'
|
||||
```
|
||||
|
||||
If successful, the command exits silently with code 0. (We can append && echo ok to advertise successful
|
||||
execution).
|
||||
|
||||
To test the syntax against actual input content:
|
||||
|
||||
```
|
||||
treerack check --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!'
|
||||
```
|
||||
|
||||
To visualize the resulting Abstract Syntax Tree (AST), use the show subcommand:
|
||||
|
||||
```
|
||||
treerack show --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!'
|
||||
```
|
||||
|
||||
The output will be raw JSON:
|
||||
|
||||
```
|
||||
{"name":"hello","from":0,"to":13,"text":"Hello, world!"}
|
||||
```
|
||||
|
||||
For a more readable output, add the --pretty flag:
|
||||
|
||||
```
|
||||
treerack show --pretty --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!'
|
||||
```
|
||||
|
||||
...then the output will look like this:
|
||||
|
||||
```
|
||||
{
|
||||
"name": "hello",
|
||||
"from": 0,
|
||||
"to": 13,
|
||||
"text": "Hello, world!"
|
||||
}
|
||||
```
|
||||
|
||||
### Handling errors
|
||||
|
||||
If our syntax definition is invalid, check-syntax will fail:
|
||||
|
||||
```
|
||||
treerack check-syntax --syntax-string 'foo = bar'
|
||||
```
|
||||
|
||||
The above command will fail because the parser called foo references an undefined parser bar.
|
||||
|
||||
We can use check or show to detect when the input content does not match a valid syntax. Using the hello syntax,
|
||||
we can try the following:
|
||||
|
||||
```
|
||||
treerack check --syntax-string 'hello = "Hello, world!"' --input-string 'Hi!'
|
||||
```
|
||||
|
||||
It will show that parsing the input failed and that it failed while using the parser hello.
|
||||
|
||||
## Basic syntax - An arithmetic calculator
|
||||
|
||||
In this section, we will build a basic arithmetic calculator. It will read a line from standard input, parse it
|
||||
as an arithmetic expression, compute the result, and print it—effectively creating a REPL (Read-Eval-Print
|
||||
Loop).
|
||||
|
||||
We will support addition +, subtraction -, multiplication *, division /, and grouping with parentheses ().
|
||||
|
||||
acalc.treerack:
|
||||
|
||||
```
|
||||
// Define whitespace characters.
|
||||
// The :ws flag marks this as the global whitespace handler.
|
||||
ignore:ws = " " | [\t] | [\r] | [\n];
|
||||
|
||||
// Define the number format.
|
||||
//
|
||||
// The :nows flag ensures we do not skip whitespace *inside* the number token. We support integers, floats, and
|
||||
// scientific notation (e.g., 1.5e3). Arbitrary leading zeros are disallowed to prevent confusion with octal
|
||||
// literals.
|
||||
num:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?;
|
||||
|
||||
// define the supported operators:
|
||||
add = "+";
|
||||
sub = "-";
|
||||
mul = "*";
|
||||
div = "/";
|
||||
|
||||
// Grouping logic.
|
||||
//
|
||||
// Expressions can be enclosed in parentheses. This references 'expression', which is defined later,
|
||||
// demonstrating recursive definitions. The :alias flag prevents 'group' from creating its own node in the AST;
|
||||
// only the child 'expression' will appear.
|
||||
group:alias = "(" expression ")";
|
||||
|
||||
// Operator Precedence.
|
||||
//
|
||||
// We group operators by precedence levels to ensure correct order of operations.
|
||||
//
|
||||
// Level 0 (High): Multiplication/Division
|
||||
op0:alias = mul | div;
|
||||
|
||||
// Level 1 (Low): Addition/Subtraction
|
||||
op1:alias = add | sub;
|
||||
|
||||
// Operands for each precedence level.
|
||||
//
|
||||
// operand0 can be a raw number or a grouped expression.
|
||||
operand0:alias = num | group;
|
||||
|
||||
// operand1 can be a higher-precedence operand or a completed binary0 operation.
|
||||
operand1:alias = operand0 | binary0;
|
||||
|
||||
// Binary Expressions.
|
||||
//
|
||||
// We define these hierarchically. 'binary0' handles high-precedence operations (mul/div).
|
||||
binary0 = operand0 (op0 operand0)+;
|
||||
binary1 = operand1 (op1 operand1)+;
|
||||
binary:alias = binary0 | binary1;
|
||||
|
||||
// The generalized Expression.
|
||||
//
|
||||
// An expression is either a raw number, a group, or a binary operation.
|
||||
expression:alias = num | group | binary;
|
||||
|
||||
// Root Definition.
|
||||
//
|
||||
// The final result is either a valid expression or the "exit" command. Since 'expression' is an alias, we need
|
||||
// a concrete root parser to anchor the AST. Note: The :root flag is optional here because this is the last
|
||||
// definition in the file.
|
||||
result = expression | "exit"
|
||||
```
|
||||
|
||||
### Testing the syntax
|
||||
|
||||
#### 1. Simple number
|
||||
|
||||
```
|
||||
treerack show --pretty --syntax acalc.treerack --input-string 42
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
```
|
||||
{
|
||||
"name": "result",
|
||||
"from": 0,
|
||||
"to": 2,
|
||||
"nodes": [
|
||||
{
|
||||
"name": "num",
|
||||
"from": 0,
|
||||
"to": 2,
|
||||
"text": "42"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### 2. Basic operation
|
||||
|
||||
```
|
||||
treerack show --pretty --syntax acalc.treerack --input-string "42 + 24"
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
```
|
||||
{
|
||||
"name": "expression",
|
||||
"from": 0,
|
||||
"to": 7,
|
||||
"nodes": [
|
||||
{
|
||||
"name": "binary1",
|
||||
"from": 0,
|
||||
"to": 7,
|
||||
"nodes": [
|
||||
{
|
||||
"name": "num",
|
||||
"from": 0,
|
||||
"to": 2,
|
||||
"text": "42"
|
||||
},
|
||||
{
|
||||
"name": "add",
|
||||
"from": 3,
|
||||
"to": 4,
|
||||
"text": "+"
|
||||
},
|
||||
{
|
||||
"name": "num",
|
||||
"from": 5,
|
||||
"to": 7,
|
||||
"text": "24"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### 3. Precedence check
|
||||
|
||||
```
|
||||
treerack show --pretty --syntax acalc.treerack --input-string "42 + 24 * 2"
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
```
|
||||
{
|
||||
"name": "result",
|
||||
"from": 0,
|
||||
"to": 11,
|
||||
"nodes": [
|
||||
{
|
||||
"name": "binary1",
|
||||
"from": 0,
|
||||
"to": 11,
|
||||
"nodes": [
|
||||
{
|
||||
"name": "num",
|
||||
"from": 0,
|
||||
"to": 2,
|
||||
"text": "42"
|
||||
},
|
||||
{
|
||||
"name": "add",
|
||||
"from": 3,
|
||||
"to": 4,
|
||||
"text": "+"
|
||||
},
|
||||
{
|
||||
"name": "binary0",
|
||||
"from": 5,
|
||||
"to": 11,
|
||||
"nodes": [
|
||||
{
|
||||
"name": "num",
|
||||
"from": 5,
|
||||
"to": 7,
|
||||
"text": "24"
|
||||
},
|
||||
{
|
||||
"name": "mul",
|
||||
"from": 8,
|
||||
"to": 9,
|
||||
"text": "*"
|
||||
},
|
||||
{
|
||||
"name": "num",
|
||||
"from": 10,
|
||||
"to": 11,
|
||||
"text": "2"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
#### 4. Grouping override
|
||||
|
||||
```
|
||||
treerack show --pretty --syntax acalc.treerack --input-string "(42 + 24) * 2"
|
||||
```
|
||||
|
||||
Notice how the 'group' alias node is not present, but now the expression of the addition is a factor in the
|
||||
multiplication:
|
||||
|
||||
```
|
||||
{
|
||||
"name": "result",
|
||||
"from": 0,
|
||||
"to": 13,
|
||||
"nodes": [
|
||||
{
|
||||
"name": "binary0",
|
||||
"from": 0,
|
||||
"to": 13,
|
||||
"nodes": [
|
||||
{
|
||||
"name": "binary1",
|
||||
"from": 1,
|
||||
"to": 8,
|
||||
"nodes": [
|
||||
{
|
||||
"name": "num",
|
||||
"from": 1,
|
||||
"to": 3,
|
||||
"text": "42"
|
||||
},
|
||||
{
|
||||
"name": "add",
|
||||
"from": 4,
|
||||
"to": 5,
|
||||
"text": "+"
|
||||
},
|
||||
{
|
||||
"name": "num",
|
||||
"from": 6,
|
||||
"to": 8,
|
||||
"text": "24"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "mul",
|
||||
"from": 10,
|
||||
"to": 11,
|
||||
"text": "*"
|
||||
},
|
||||
{
|
||||
"name": "num",
|
||||
"from": 12,
|
||||
"to": 13,
|
||||
"text": "2"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Generator - Implementing the calculator
|
||||
|
||||
We will now generate the Go parser code and integrate it into a CLI application.
|
||||
|
||||
Initialize the project:
|
||||
|
||||
```
|
||||
go mod init acalc && go mod tidy
|
||||
```
|
||||
|
||||
Generate the parser:
|
||||
|
||||
```
|
||||
treerack generate --syntax acalc.treerack > parser.go
|
||||
```
|
||||
|
||||
Implement the application logic in main.go.
|
||||
|
||||
main.go:
|
||||
|
||||
```
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var errExit = errors.New("exit")
|
||||
|
||||
// repl runs the Read-Eval-Print Loop.
|
||||
func repl(input io.Reader, output io.Writer) {
|
||||
|
||||
// use buffered io, to be able to read the input line-by-line:
|
||||
buf := bufio.NewReader(os.Stdin)
|
||||
|
||||
// our REPL loop:
|
||||
for {
|
||||
// print a basic prompt:
|
||||
if _, err := output.Write([]byte("> ")); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
// read the input and handle the errors:
|
||||
expr, err := read(buf)
|
||||
|
||||
// Handle EOF (Ctrl+D)
|
||||
if errors.Is(err, io.EOF) {
|
||||
output.Write([]byte{'\n'})
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Handle explicit exit command
|
||||
if errors.Is(err, errExit) {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Handle parser errors (allow user to retry)
|
||||
var perr *parseError
|
||||
if errors.As(err, &perr) {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
// Evaluate and print
|
||||
result := eval(expr)
|
||||
if err := print(output, result); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func read(input *bufio.Reader) (*node, error) {
|
||||
line, err := input.ReadString('\n')
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Parse the line using the generated parser
|
||||
expr, err := parse(bytes.NewBufferString(line))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if strings.TrimSpace(expr.Text()) == "exit" {
|
||||
return nil, errExit
|
||||
}
|
||||
|
||||
// Based on our syntax, the root node always has exactly one child:
|
||||
// either a number or a binary operation.
|
||||
return expr.Nodes[0], nil
|
||||
}
|
||||
|
||||
// eval always returns the calculated result as a float64:
|
||||
func eval(expr *node) float64 {
|
||||
var value float64
|
||||
switch expr.Name {
|
||||
case "num":
|
||||
|
||||
// the number format in our syntax is based on the JSON spec, so we can piggy-back on it for the number
|
||||
// parsing. In a real application, we would need to handle the errors here anyway, even if our parser
|
||||
// already validated the input:
|
||||
json.Unmarshal([]byte(expr.Text()), &value)
|
||||
return value
|
||||
default:
|
||||
|
||||
// Handle binary expressions (recursively)
|
||||
// Format: Operand [Operator Operand]...
|
||||
value, expr.Nodes = eval(expr.Nodes[0]), expr.Nodes[1:]
|
||||
for len(expr.Nodes) > 0 {
|
||||
var (
|
||||
operator string
|
||||
operand float64
|
||||
)
|
||||
|
||||
operator, operand, expr.Nodes = expr.Nodes[0].Name, eval(expr.Nodes[1]), expr.Nodes[2:]
|
||||
switch operator {
|
||||
case "add":
|
||||
value += operand
|
||||
case "sub":
|
||||
value -= operand
|
||||
case "mul":
|
||||
value *= operand
|
||||
case "div":
|
||||
value /= operand // Go handles division by zero as ±Inf
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
func print(output io.Writer, result float64) error {
|
||||
_, err := fmt.Fprintln(output, result)
|
||||
return err
|
||||
}
|
||||
|
||||
func main() {
|
||||
// for testability, we define the REPL loop in a separate function so that the test code can call it with
|
||||
// in-memory buffers as input and output. Our main function calls it with the stdio handles:
|
||||
repl(os.Stdin, os.Stdout)
|
||||
}
|
||||
```
|
||||
|
||||
### Running the calculator
|
||||
|
||||
Our arithmetic calculator is now ready. We can run it via `go run .`. An example session may look like this:
|
||||
|
||||
```
|
||||
$ go run .
|
||||
> (42 + 24) * 2
|
||||
132
|
||||
> 42 + 24 * 2
|
||||
90
|
||||
> 1 + 2 + 3
|
||||
6
|
||||
> exit
|
||||
```
|
||||
|
||||
We can find the source files for this example here: [./examples/acalc](./examples/acalc).
|
||||
|
||||
## Important Note: Unescaping
|
||||
|
||||
Treerack does not automatically handle escape sequences (e.g., converting \n to a literal newline). If our
|
||||
syntax supports escaped characters—common in string literals—the user code is responsible for "unescaping" the
|
||||
raw text from the AST node.
|
||||
|
||||
This is analogous to how we needed to parse the numbers in the calculator example to convert the string
|
||||
representation of a number into a Go float64.
|
||||
|
||||
## Programmatically loading syntaxes
|
||||
|
||||
While generating static code via treerack generate is the recommended approach, we can also load definitions
|
||||
dynamically at runtime.
|
||||
|
||||
```
|
||||
package parser
|
||||
|
||||
import (
|
||||
"io"
|
||||
"code.squareroundforest.org/arpio/treerack"
|
||||
)
|
||||
|
||||
func initAndParse(syntax, content io.Reader) (*treerack.Node, error) {
|
||||
s := &treerack.Syntax{}
|
||||
if err := s.ReadSyntax(syntax); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := s.Init(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return s.Parse(content)
|
||||
}
|
||||
```
|
||||
|
||||
Caution: Be mindful of security implications when loading syntax definitions from untrusted sources.
|
||||
|
||||
## Programmatically defining syntaxes
|
||||
|
||||
In rare cases where a syntax must be constructed computationally, we can define rules via the Go API:
|
||||
|
||||
```
|
||||
package parser
|
||||
|
||||
import (
|
||||
"io"
|
||||
"code.squareroundforest.org/arpio/treerack"
|
||||
)
|
||||
|
||||
func initAndParse(content io.Reader) (*treerack.Node, error) {
|
||||
s := &treerack.Syntax{}
|
||||
|
||||
// whitespace:
|
||||
s.Class("whitespace-chars", treerack.Alias, false, []rune{' ', '\t', '\r\, '\n'}, nil)
|
||||
s.Choice("whitespace", treerack.Whitespace, "whitespace-chars")
|
||||
|
||||
s.Class("digit", treerack.Alias, false, nil, [][]rune{'0', '9'})
|
||||
s.Sequence("number", treerack.NoWhitespace, treerack.SequenceItem{Name: "digit", Min: 1})
|
||||
s.Class("operator", treerack.None, false, []rune{'+', '-'}, nil)
|
||||
s.Sequence(
|
||||
"expression",
|
||||
treerack.Root,
|
||||
treerack.SequenceItem{Name: "number"},
|
||||
treerack.SequenceItem{Name: "operator"},
|
||||
treerack.SequenceItem{Name: "number"},
|
||||
)
|
||||
|
||||
if err := s.Init(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return s.Parse(content)
|
||||
}
|
||||
```
|
||||
|
||||
## Summary
|
||||
|
||||
We have demonstrated how to use the Treerack tool to define, test, and implement a parser. We recommend the
|
||||
following workflow:
|
||||
|
||||
1. draft: define a syntax in a .treerack file.
|
||||
2. verify: use `treerack check` and `treerack show` to validate building blocks incrementally.
|
||||
3. generate: use `treerack generate` to create embeddable Go code.
|
||||
|
||||
**Links:**
|
||||
|
||||
- the detailed documentation of the treerack definition language: [./syntax.md](./syntax.md)
|
||||
- treerack command help: [../cmd/treerack/readme.md](../cmd/treerack/readme.md) or, if the command is installed,
|
||||
`man treerack`, or `path/to/treerack help`
|
||||
- the arithmetic calculator example: [./examples/acalc](./examples/acalc).
|
||||
- additional examples: [./examples](./examples)
|
||||
|
||||
Happy parsing!
|
||||
121
docs/syntax.md
Normal file
121
docs/syntax.md
Normal file
@ -0,0 +1,121 @@
|
||||
# Treerack Syntax Definition Language
|
||||
|
||||
The Treerack library uses a custom grammar description language derived from EBNF (Extended Backus-Naur Form).
|
||||
It allows for the concise definition of recursive descent parsers.
|
||||
|
||||
A syntax file consists of a series of Production Rules (definitions), terminated by semicolons.
|
||||
|
||||
## Production Rules
|
||||
|
||||
A rule assigns a name to a pattern expression. Rules may include optional flags to modify the parser's behavior
|
||||
or the resulting AST (Abstract Syntax Tree).
|
||||
|
||||
```
|
||||
RuleName = Expression;
|
||||
RuleName:flag1:flag2 = Expression;
|
||||
```
|
||||
|
||||
## Flags
|
||||
|
||||
Flags are appended to the rule name, separated by colons. They control AST generation, whitespace handling, and
|
||||
error propagation.
|
||||
|
||||
- `alias`: Transparent Node. The rule validates input but does not create its own node in the AST. Children
|
||||
nodes (if any) are attached to the parent of this rule.
|
||||
- `ws`: Global Whitespace. Marks this rule as the designated whitespace handler. The parser will attempt to
|
||||
match (and discard) this rule between tokens throughout the entire syntax.
|
||||
- `nows`: No Whitespace. Disables automatic whitespace skipping inside this rule. Useful for defining tokens
|
||||
like string literals where spaces are significant.
|
||||
- `root`: Entry Point. Explicitly marks the rule as the starting point of the syntax. If omitted, the last
|
||||
defined rule is implied to be the root.
|
||||
- `kw`: Keyword. Marks the content as a reserved keyword.
|
||||
- `nokw`: No Keyword. Prevents the rule from matching text that matches a defined kw rule. Essential for
|
||||
distinguishing identifiers from keywords (e.g., ensuring var is not parsed as a variable name).
|
||||
- `failpass`: Pass Failure. If this rule fails to parse, the error is reported as a failure of the parent rule,
|
||||
not this specific rule.
|
||||
|
||||
## Expressions
|
||||
|
||||
Expressions define the structure of the text to be parsed. They are composed of terminals, sequences, choices,
|
||||
and quantifiers.
|
||||
|
||||
## Terminals
|
||||
|
||||
Terminals match specific characters or strings in the input.
|
||||
|
||||
- `"abc"` (string): Matches an exact sequence of characters.
|
||||
- `.` (any char): Matches any single character (wildcard).
|
||||
- `[123]`, `[a-z]`, `[123a-z]` (class): Matches a single character from a set or range.
|
||||
- `[^123]`, `[^a-z]`, `[^123a-z]` (not class) Matches any single character not in the set.
|
||||
|
||||
## Quantifiers
|
||||
|
||||
Quantifiers determine how many times an item must match. They are placed immediately after the item they modify.
|
||||
|
||||
- `?`: Optional (Zero or one).
|
||||
- `*`: Zero or more.
|
||||
- `+`: One or more.
|
||||
- `{n}`: Exact count. Matches exactly n times.
|
||||
- `{n,}`: At least. Matches n or more times.
|
||||
- `{,m}`: At most. Matches between 0 and m times.
|
||||
- `{n,m}`: Range. Matches between n and m times.
|
||||
|
||||
## Composites
|
||||
|
||||
Complex patterns are built by combining terminals and other rules.
|
||||
|
||||
### 1. Sequences
|
||||
|
||||
Items written consecutively are matched in order.
|
||||
|
||||
```
|
||||
// Matches "A", then "B", then "C"
|
||||
MySequence = "A" "B" "C";
|
||||
```
|
||||
|
||||
### 2. Grouping
|
||||
|
||||
Parentheses (...) group items together, allowing quantifiers to apply to the entire group.
|
||||
|
||||
```
|
||||
// Matches "AB", "ABAB", "ABABAB"...
|
||||
MyGroup = ("A" "B")+;
|
||||
```
|
||||
|
||||
### 3. Choices
|
||||
|
||||
The pipe | character represents a choice between alternatives.
|
||||
|
||||
The parser evaluates all provided options against the input at the current position and selects the best match
|
||||
based on the following priority rules:
|
||||
|
||||
1. _Longest Match_: The option that consumes the largest number of characters takes priority. This eliminates the
|
||||
need to manually order specific matches before general ones (e.g., "integer" will always be chosen over "int" if
|
||||
the input supports it, regardless of their order in the definition).
|
||||
2. _First Definition Wins_: If multiple options consume the exact same number of characters, the option defined
|
||||
first(left-most) in the list takes priority.
|
||||
|
||||
```
|
||||
// Longest match wins automatically:
|
||||
// Input "integer" is matched by 'type', even though "int" comes first.
|
||||
type = "int" | "integer";
|
||||
|
||||
// Tie-breaker rule:
|
||||
// If input is "foo", both options match 3 characters.
|
||||
// Because 'identifier' is last, it takes priority over 'keyword'.
|
||||
// (Use :kw and :nokw to control such situations, when it applies.)
|
||||
content = keyword | identifier;
|
||||
```
|
||||
|
||||
## Comments
|
||||
|
||||
Comments follow C-style syntax and are ignored by the definition parser.
|
||||
|
||||
- Line comments: Start with // and end at the newline.
|
||||
- Block comments: Enclosed in /* ... */.
|
||||
|
||||
## Examples
|
||||
|
||||
- [JSON](examples/json.treerack)
|
||||
- [Scheme](examples/scheme.treerack)
|
||||
- [Treerack (itself)](../syntax.treerack)
|
||||
30
escape.go
30
escape.go
@ -61,33 +61,3 @@ func unescapeChar(c rune) rune {
|
||||
return c
|
||||
}
|
||||
}
|
||||
|
||||
func unescape(escape rune, banned, chars []rune) ([]rune, error) {
|
||||
var (
|
||||
unescaped []rune
|
||||
escaped bool
|
||||
)
|
||||
|
||||
for _, ci := range chars {
|
||||
if escaped {
|
||||
unescaped = append(unescaped, unescapeChar(ci))
|
||||
escaped = false
|
||||
continue
|
||||
}
|
||||
|
||||
switch {
|
||||
case ci == escape:
|
||||
escaped = true
|
||||
case runesContain(banned, ci):
|
||||
return nil, ErrInvalidEscapeCharacter
|
||||
default:
|
||||
unescaped = append(unescaped, ci)
|
||||
}
|
||||
}
|
||||
|
||||
if escaped {
|
||||
return nil, ErrInvalidEscapeCharacter
|
||||
}
|
||||
|
||||
return unescaped, nil
|
||||
}
|
||||
|
||||
@ -2,33 +2,8 @@ package treerack
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestUnescape(t *testing.T) {
|
||||
t.Run("char should be escaped", func(t *testing.T) {
|
||||
if _, err := unescape('\\', []rune{'a'}, []rune{'a'}); err == nil {
|
||||
t.Error("failed to fail")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("finished with escape char", func(t *testing.T) {
|
||||
if _, err := unescape('\\', []rune{'a'}, []rune{'b', '\\'}); err == nil {
|
||||
t.Error("failed to fail")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("unescapes", func(t *testing.T) {
|
||||
u, err := unescape('\\', []rune{'a'}, []rune{'b', '\\', 'a'})
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
if string(u) != "ba" {
|
||||
t.Error("unescape failed")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestEscape(t *testing.T) {
|
||||
t.Skip()
|
||||
const (
|
||||
banned = "\b\f\n\r\t\v"
|
||||
unescaped = "\b\f\n\r\t\v"
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -729,10 +729,11 @@ func (c *context) finalizeParse(root parser) error {
|
||||
}
|
||||
|
||||
type Node struct {
|
||||
Name string
|
||||
Nodes []*Node
|
||||
From, To int
|
||||
tokens []rune
|
||||
Name string
|
||||
Nodes []*Node
|
||||
From int
|
||||
To int
|
||||
tokens []rune
|
||||
}
|
||||
|
||||
func (n *Node) Tokens() []rune {
|
||||
|
||||
@ -285,7 +285,7 @@ func jsonTreeToJSON(n *Node) (interface{}, error) {
|
||||
}
|
||||
|
||||
func TestJSON(t *testing.T) {
|
||||
runTestsFile(t, "examples/json.treerack", []testItem{{
|
||||
runTestsFile(t, "docs/examples/json.treerack", []testItem{{
|
||||
title: "true",
|
||||
text: "true",
|
||||
node: &Node{
|
||||
@ -509,7 +509,7 @@ func TestRandomJSON(t *testing.T) {
|
||||
|
||||
buf := bytes.NewBuffer(b)
|
||||
|
||||
s, err := openSyntaxFile("examples/json.treerack")
|
||||
s, err := openSyntaxFile("docs/examples/json.treerack")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
|
||||
@ -3,7 +3,7 @@ package treerack
|
||||
import "testing"
|
||||
|
||||
func TestKeyVal(t *testing.T) {
|
||||
runTestsFile(t, "examples/keyval.treerack", []testItem{{
|
||||
runTestsFile(t, "docs/examples/keyval.treerack", []testItem{{
|
||||
title: "empty",
|
||||
}, {
|
||||
title: "a comment",
|
||||
|
||||
@ -4,20 +4,53 @@ import "testing"
|
||||
|
||||
func TestKeyword(t *testing.T) {
|
||||
const syntax = `
|
||||
keywords:kw = "foo" | "bar";
|
||||
symbol:nokw = [a-z]+;
|
||||
space:ws = " ";
|
||||
keyword:kw = "foo" | "bar";
|
||||
symbol:nokw:nows = [a-z]+;
|
||||
doc:root = (keyword | symbol)*;
|
||||
`
|
||||
|
||||
runTests(t, syntax, []testItem{{
|
||||
title: "keyword",
|
||||
text: "foo",
|
||||
fail: true,
|
||||
}, {
|
||||
title: "not keyword",
|
||||
text: "baz",
|
||||
ignorePosition: true,
|
||||
node: &Node{
|
||||
Name: "symbol",
|
||||
Name: "doc",
|
||||
Nodes: []*Node{{
|
||||
Name: "symbol",
|
||||
}},
|
||||
},
|
||||
}, {
|
||||
title: "keyword",
|
||||
text: "foo",
|
||||
ignorePosition: true,
|
||||
node: &Node{
|
||||
Name: "doc",
|
||||
Nodes: []*Node{{
|
||||
Name: "keyword",
|
||||
}},
|
||||
},
|
||||
}, {
|
||||
title: "mixed",
|
||||
text: "foo bar baz bar foo baz bar",
|
||||
ignorePosition: true,
|
||||
node: &Node{
|
||||
Name: "doc",
|
||||
Nodes: []*Node{{
|
||||
Name: "keyword",
|
||||
}, {
|
||||
Name: "keyword",
|
||||
}, {
|
||||
Name: "symbol",
|
||||
}, {
|
||||
Name: "keyword",
|
||||
}, {
|
||||
Name: "keyword",
|
||||
}, {
|
||||
Name: "symbol",
|
||||
}, {
|
||||
Name: "keyword",
|
||||
}},
|
||||
},
|
||||
}})
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@ package treerack
|
||||
import "testing"
|
||||
|
||||
func TestMML(t *testing.T) {
|
||||
s, err := openSyntaxFile("examples/mml.treerack")
|
||||
s, err := openSyntaxFile("docs/examples/mml.treerack")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
|
||||
@ -5,7 +5,7 @@ import (
|
||||
)
|
||||
|
||||
func TestMMLExp2(t *testing.T) {
|
||||
s, err := openSyntaxFile("examples/mml-exp2.treerack")
|
||||
s, err := openSyntaxFile("docs/examples/mml-exp2.treerack")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
|
||||
@ -5,7 +5,7 @@ import (
|
||||
)
|
||||
|
||||
func TestMMLExp3(t *testing.T) {
|
||||
s, err := openSyntaxFile("examples/mml-exp3.treerack")
|
||||
s, err := openSyntaxFile("docs/examples/mml-exp3.treerack")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
|
||||
@ -9,7 +9,7 @@ import (
|
||||
)
|
||||
|
||||
func TestMMLExp(t *testing.T) {
|
||||
s, err := openSyntaxFile("examples/mml-exp.treerack")
|
||||
s, err := openSyntaxFile("docs/examples/mml-exp.treerack")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
@ -2987,7 +2987,7 @@ func TestMMLFile(t *testing.T) {
|
||||
|
||||
const n = 180
|
||||
|
||||
s, err := openSyntaxFile("examples/mml-exp.treerack")
|
||||
s, err := openSyntaxFile("docs/examples/mml-exp.treerack")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
@ -2995,7 +2995,7 @@ func TestMMLFile(t *testing.T) {
|
||||
|
||||
s.Init()
|
||||
|
||||
f, err := os.Open("examples/test.mml")
|
||||
f, err := os.Open("docs/examples/test.mml")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
|
||||
26
nodehead.go
26
nodehead.go
@ -2,21 +2,39 @@ package treerack
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Node represents a distinct element in the resulting Abstract Syntax Tree (AST) following a successful parse.
|
||||
// Every named parser that is not an Alias or Whitespace yields a Node.
|
||||
type Node struct {
|
||||
Name string
|
||||
Nodes []*Node
|
||||
From, To int
|
||||
tokens []rune
|
||||
|
||||
// Name is the identifier of the parser that generated this node.
|
||||
Name string
|
||||
|
||||
// Nodes contains the child nodes representing the substructures of this node.
|
||||
Nodes []*Node
|
||||
|
||||
// From is the inclusive character offset of the starting position in the input stream.
|
||||
From int
|
||||
|
||||
// To is the exclusive character offset of the ending position in the input stream.
|
||||
To int
|
||||
|
||||
tokens []rune
|
||||
}
|
||||
|
||||
// Tokens returns the raw slice of runes from the input stream represented by this node.
|
||||
//
|
||||
// Note: This returns a reference to the underlying buffer, not a copy. It should not be modified.
|
||||
func (n *Node) Tokens() []rune {
|
||||
return n.tokens
|
||||
}
|
||||
|
||||
// String returns the string representation of the node, including its name, position range (From/To), and the
|
||||
// captured text content.
|
||||
func (n *Node) String() string {
|
||||
return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text())
|
||||
}
|
||||
|
||||
// Text returns the actual string segment from the input stream represented by this node.
|
||||
func (n *Node) Text() string {
|
||||
return string(n.Tokens()[n.From:n.To])
|
||||
}
|
||||
|
||||
66
readme.md
66
readme.md
@ -1,9 +1,65 @@
|
||||
# treerack
|
||||
|
||||
[WIP] A generic parser generator for Go.
|
||||
**A parser generator for Go.**
|
||||
|
||||
### Examples
|
||||
Treerack defines and generates recursive descent parsers for arbitrary syntaxes, processing input content into
|
||||
its Abstract Syntax Tree (AST) representation. It utilizes a custom syntax definition format derived from EBNF
|
||||
(Extended Backus-Naur Form), allowing for clear and concise grammar descriptions.
|
||||
|
||||
- JSON: https://code.squareroundforest.org/arpio/treerack/blob/master/examples/json.treerack
|
||||
- Scheme: https://code.squareroundforest.org/arpio/treerack/blob/master/examples/scheme.treerack
|
||||
- Treerack (itself): https://code.squareroundforest.org/arpio/treerack/blob/master/syntax.treerack
|
||||
## Examples
|
||||
|
||||
- **JSON**: [docs/examples/json.treerack](docs/examples/json.treerack)
|
||||
- **Scheme**: [docs/examples/scheme.treerack](docs/examples/scheme.treerack)
|
||||
- **Treerack (self-definition)**: [syntax.treerack](syntax.treerack)
|
||||
|
||||
## Overview
|
||||
|
||||
Treerack operates without a separate lexing phase, parsing character streams directly to produce an AST. The
|
||||
syntax language supports recursive references, enabling the definition of context-free grammars.
|
||||
|
||||
We can define syntaxes during development and use the provided tool to generate static Go code, which is then
|
||||
built into the application. Alternatively, the library supports loading syntaxes dynamically at runtime.
|
||||
|
||||
## Installation
|
||||
|
||||
From source:
|
||||
|
||||
```
|
||||
git clone https://code.squareroundforest.org/arpio/treerack
|
||||
cd treerack
|
||||
make install
|
||||
```
|
||||
|
||||
Alternatively:
|
||||
|
||||
```
|
||||
go install code.squareroundforest.org/arpio/treerack/cmd/treerack
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
- [Manual](docs/manual.md): A guide to the main use cases supported by Treerack.
|
||||
- [Syntax Definition](docs/syntax.md): Detailed reference for the Treerack definition language.
|
||||
- [Library Documentation](https://godocs.io/code.squareroundforest.org/arpio/treerack): GoDoc reference for the
|
||||
runtime library.
|
||||
|
||||
## Developer Notes
|
||||
|
||||
We use a Makefile to manage the build and verification lifecycle.
|
||||
|
||||
Important: Generating the parser for the Treerack syntax itself (bootstrapping) requires multiple phases.
|
||||
Consequently, running standard go build or go test commands may miss subtle consistency problems.
|
||||
|
||||
The authoritative way to verify changes is via the makefile:
|
||||
|
||||
```
|
||||
make check
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
- Lexer & UTF-8: Treerack does not require a lexer, which simplifies the architecture. However, this enforces
|
||||
the use of UTF-8 input. We have considered support for custom tokenizers as a potential future improvement.
|
||||
- Whitespace Delimited Languages: Due to the recursive descent nature and the lack of a dedicated lexer state,
|
||||
defining whitespace-delimited syntaxes (such as Python-style indentation) can be difficult to achieve with the
|
||||
current feature set.
|
||||
|
||||
@ -3,7 +3,7 @@ package treerack
|
||||
import "testing"
|
||||
|
||||
func TestScheme(t *testing.T) {
|
||||
runTestsFile(t, "examples/scheme.treerack", []testItem{{
|
||||
runTestsFile(t, "docs/examples/scheme.treerack", []testItem{{
|
||||
title: "empty",
|
||||
}, {
|
||||
title: "a function",
|
||||
|
||||
@ -140,5 +140,10 @@ func main() {
|
||||
varName = "headCodeExported"
|
||||
}
|
||||
|
||||
fmt.Printf("package %s\n\n// generated with scripts/createhead.go\nconst %s=%s", packageName, varName, quotedCode)
|
||||
fmt.Printf(
|
||||
"package %s\n\n// generated with scripts/createhead.go\nconst %s=%s",
|
||||
packageName,
|
||||
varName,
|
||||
quotedCode,
|
||||
)
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@ package treerack
|
||||
import "testing"
|
||||
|
||||
func TestSExpr(t *testing.T) {
|
||||
runTestsFile(t, "examples/sexpr.treerack", []testItem{{
|
||||
runTestsFile(t, "docs/examples/sexpr.treerack", []testItem{{
|
||||
title: "number",
|
||||
text: "42",
|
||||
nodes: []*Node{{
|
||||
|
||||
84
syntax.go
84
syntax.go
@ -1,3 +1,18 @@
|
||||
// Package treerack provides a parser generator for defining and interacting with arbitrary syntaxes.
|
||||
//
|
||||
// Treerack allows developers to define grammars - programmatically or via a syntax definition language
|
||||
// derivative of EBNF — and generate recursive descent parsers. These parsers process input content and produce
|
||||
// an Abstract Syntax Tree (AST) representation.
|
||||
//
|
||||
// The library supports two primary workflows:
|
||||
//
|
||||
// 1. Dynamic (Runtime): Loading or defining syntaxes programmatically at runtime to parse input immediately.
|
||||
//
|
||||
// 2. Static (Generation): Defining syntaxes during development and generating Go source code to be compiled
|
||||
// into the application.
|
||||
//
|
||||
// For detailed syntax definition rules and the command-line tool usage, please refer to the repository
|
||||
// documentation: https://code.squareroundforest.org/arpio/treerack
|
||||
package treerack
|
||||
|
||||
import (
|
||||
@ -7,14 +22,37 @@ import (
|
||||
"io"
|
||||
)
|
||||
|
||||
// if min=0&&max=0, it means min=1,max=1
|
||||
// else if max<=0, it means no max
|
||||
// else if min<=0, it means no min
|
||||
// SequenceItem represents a single element within a sequence definition, referencing another parser by name.
|
||||
//
|
||||
// Cardinality logic for SequenceItem:
|
||||
//
|
||||
// - If Min=0 and Max=0: Matches exactly once (equivalent to Min=1, Max=1).
|
||||
//
|
||||
// - If Max <= 0: Unbounded upper limit (matches Min or more times).
|
||||
//
|
||||
// - If Min <= 0: No lower limit (matches 0 to Max times).
|
||||
type SequenceItem struct {
|
||||
Name string
|
||||
Min, Max int
|
||||
|
||||
// Name is the identifier of the referenced parser definition.
|
||||
Name string
|
||||
|
||||
// Min specifies the minimum required occurrences of the item.
|
||||
Min int
|
||||
|
||||
// Max specifies the maximum accepted occurrences of the item.
|
||||
Max int
|
||||
}
|
||||
|
||||
// Syntax represents a complete grammar definition consisting of multiple named parsers.
|
||||
//
|
||||
// The lifecycle of a Syntax instance consists of three phases:
|
||||
//
|
||||
// 1. Definition: Define parsers using methods like AnyChar, Sequence, and Choice, or load a definition via
|
||||
// ReadSyntax.
|
||||
//
|
||||
// 2. Initialization: Call Init() to validate definitions, resolve references, and seal the syntax.
|
||||
//
|
||||
// 3. Execution: Use Parse() to process input or Generate() to create Go source code.
|
||||
type Syntax struct {
|
||||
registry *registry
|
||||
initialized bool
|
||||
@ -24,9 +62,15 @@ type Syntax struct {
|
||||
root definition
|
||||
}
|
||||
|
||||
// GeneratorOptions control the behavior of the Go code generator.
|
||||
type GeneratorOptions struct {
|
||||
|
||||
// PackageName sets the package name for the generated source file. Defaults to main.
|
||||
PackageName string
|
||||
Export bool
|
||||
|
||||
// Export determines whether the generated Parse function is exported (public) or unexported (private)
|
||||
// within the package.
|
||||
Export bool
|
||||
}
|
||||
|
||||
// applied in a non-type-checked way
|
||||
@ -51,11 +95,18 @@ type definition interface {
|
||||
}
|
||||
|
||||
var (
|
||||
ErrSyntaxInitialized = errors.New("syntax initialized")
|
||||
ErrNoParsersDefined = errors.New("no parsers defined")
|
||||
ErrInvalidEscapeCharacter = errors.New("invalid escape character")
|
||||
ErrMultipleRoots = errors.New("multiple roots")
|
||||
ErrInvalidSymbolName = errors.New("invalid symbol name")
|
||||
|
||||
// ErrSyntaxInitialized is returned when attempting to modify a syntax that has already been initialized.
|
||||
ErrSyntaxInitialized = errors.New("syntax initialized")
|
||||
|
||||
// ErrNoParsersDefined is returned when attempting to initialize a syntax containing no parser definitions.
|
||||
ErrNoParsersDefined = errors.New("no parsers defined")
|
||||
|
||||
// ErrMultipleRoots is returned when a syntax definition contains multiple explicit root parsers.
|
||||
ErrMultipleRoots = errors.New("multiple roots")
|
||||
|
||||
// ErrInvalidSymbolName is returned when a named parser is assigned an invalid identifier.
|
||||
ErrInvalidSymbolName = errors.New("invalid symbol name")
|
||||
)
|
||||
|
||||
func (ct CommitType) String() string {
|
||||
@ -193,6 +244,7 @@ func (s *Syntax) anyChar(name string, ct CommitType) error {
|
||||
return s.class(name, ct, true, nil, nil)
|
||||
}
|
||||
|
||||
// AnyChar registers a parser that accepts any single character (a wildcard).
|
||||
func (s *Syntax) AnyChar(name string, ct CommitType) error {
|
||||
if !isValidSymbol(name) {
|
||||
return ErrInvalidSymbolName
|
||||
@ -223,6 +275,8 @@ func (s *Syntax) class(name string, ct CommitType, not bool, chars []rune, range
|
||||
return s.sequence(name, ct, SequenceItem{Name: cname})
|
||||
}
|
||||
|
||||
// Class registers a character class parser, accepting characters defined in the specific list or ranges. If
|
||||
// 'not' is true, it matches any character *except* those defined.
|
||||
func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error {
|
||||
if !isValidSymbol(name) {
|
||||
return ErrInvalidSymbolName
|
||||
@ -244,6 +298,7 @@ func (s *Syntax) charSequence(name string, ct CommitType, chars []rune) error {
|
||||
return s.sequence(name, ct|NoWhitespace, namesToSequenceItems(refs)...)
|
||||
}
|
||||
|
||||
// CharSequence registers a parser that matches a specific string literal (e.g., "foo").
|
||||
func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error {
|
||||
if !isValidSymbol(name) {
|
||||
return ErrInvalidSymbolName
|
||||
@ -256,6 +311,7 @@ func (s *Syntax) sequence(name string, ct CommitType, items ...SequenceItem) err
|
||||
return s.register(newSequence(name, ct, items))
|
||||
}
|
||||
|
||||
// Sequence registers a parser that matches a specific order of other named parsers (defined as SequenceItems).
|
||||
func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) error {
|
||||
if !isValidSymbol(name) {
|
||||
return ErrInvalidSymbolName
|
||||
@ -268,6 +324,7 @@ func (s *Syntax) choice(name string, ct CommitType, options ...string) error {
|
||||
return s.register(newChoice(name, ct, options))
|
||||
}
|
||||
|
||||
// Choice registers a parser that matches exactly one of the provided named options.
|
||||
func (s *Syntax) Choice(name string, ct CommitType, options ...string) error {
|
||||
if !isValidSymbol(name) {
|
||||
return ErrInvalidSymbolName
|
||||
@ -276,6 +333,7 @@ func (s *Syntax) Choice(name string, ct CommitType, options ...string) error {
|
||||
return s.choice(name, ct|userDefined, options...)
|
||||
}
|
||||
|
||||
// ReadSyntax loads a grammar definition from a reader using the Treerack syntax format.
|
||||
func (s *Syntax) ReadSyntax(r io.Reader) error {
|
||||
if s.initialized {
|
||||
return ErrSyntaxInitialized
|
||||
@ -302,6 +360,8 @@ func (s *Syntax) ReadSyntax(r io.Reader) error {
|
||||
return define(s, n)
|
||||
}
|
||||
|
||||
// Init validates, initializes, and seals the syntax. This method must be called exactly once before Parsing or
|
||||
// Generating.
|
||||
func (s *Syntax) Init() error {
|
||||
if s.errInitFailed != nil {
|
||||
return s.errInitFailed
|
||||
@ -359,6 +419,7 @@ func (s *Syntax) keywordParsers() []parser {
|
||||
return p
|
||||
}
|
||||
|
||||
// Generate writes Go source code implementing the parser to the provided writer.
|
||||
func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error {
|
||||
if err := s.Init(); err != nil {
|
||||
return err
|
||||
@ -454,6 +515,7 @@ func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Parse reads from the input stream and constructs an AST based on the defined syntax.
|
||||
func (s *Syntax) Parse(r io.Reader) (*Node, error) {
|
||||
if err := s.Init(); err != nil {
|
||||
return nil, err
|
||||
|
||||
@ -7,16 +7,39 @@ import (
|
||||
"io"
|
||||
)
|
||||
|
||||
// CommitType controls how the output of a named parser is handled and represented in the resulting AST.
|
||||
type CommitType int
|
||||
|
||||
const (
|
||||
None CommitType = 0
|
||||
|
||||
// None indicates the default behavior: parsed segments are represented as named nodes in the AST.
|
||||
// Whitespace handling inherits the syntax's global settings.
|
||||
None CommitType = 0
|
||||
|
||||
// Alias treats the parser as a pass-through. Validated segments are included in the AST node of the
|
||||
// enclosing parser rather than creating a distinct node.
|
||||
Alias CommitType = 1 << iota
|
||||
|
||||
// Whitespace designates a parser (typically a character sequence) to be treated as ignored whitespace
|
||||
// throughout the input.
|
||||
Whitespace
|
||||
|
||||
// NoWhitespace prevents the automatic skipping of defined whitespace characters within specific sequences.
|
||||
NoWhitespace
|
||||
|
||||
// Keyword marks a sequence as a reserved keyword. This allows specific sequences to be protected or
|
||||
// restricted in certain contexts via the NoKeyword flag.
|
||||
Keyword
|
||||
|
||||
// NoKeyword prevents the parser from matching sequences marked as Keywords.
|
||||
NoKeyword
|
||||
|
||||
// FailPass configures the parser to propagate failure up to the enclosing parser rather than handling it
|
||||
// locally.
|
||||
FailPass
|
||||
|
||||
// Root explicitly marks the parser as the root of the syntax. By default, the last defined parser is
|
||||
// considered the root.
|
||||
Root
|
||||
|
||||
userDefined
|
||||
@ -30,30 +53,24 @@ const (
|
||||
formatIncludeComments
|
||||
)
|
||||
|
||||
// ParseError is returned when the input text doesn't match
|
||||
// the used syntax during parsing.
|
||||
// ParseError reports a failure to match the input text against the defined syntax.
|
||||
type ParseError struct {
|
||||
|
||||
// Input is the name of the input file or <input> if not
|
||||
// available.
|
||||
// Input denotes the name of the input source (e.g., filename), or "<input>" if unavailable.
|
||||
Input string
|
||||
|
||||
// Offset is the index of the right-most failing
|
||||
// token in the input text.
|
||||
// Offset is the index of the right-most token where the parse failed.
|
||||
Offset int
|
||||
|
||||
// Line tells the line index of the right-most failing
|
||||
// token in the input text.
|
||||
// Line is the zero-based line number of the failure position.
|
||||
//
|
||||
// It is zero-based, and for error reporting, it is
|
||||
// recommended to increment it by one.
|
||||
// For display purposes, increment by one.
|
||||
Line int
|
||||
|
||||
// Column tells the column index of the right-most failing
|
||||
// token in the input text.
|
||||
// Column is the zero-based column index of the failure position.
|
||||
Column int
|
||||
|
||||
// Definition tells the right-most unmatched parser definition.
|
||||
// Definition identifies the name of the specific parser definition where the match failed.
|
||||
Definition string
|
||||
}
|
||||
|
||||
@ -70,8 +87,10 @@ type builder interface {
|
||||
build(*context) ([]*Node, bool)
|
||||
}
|
||||
|
||||
// ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences.
|
||||
var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
|
||||
|
||||
// Error returns the formatted failure message.
|
||||
func (pe *ParseError) Error() string {
|
||||
return fmt.Sprintf(
|
||||
"%s:%d:%d:parse failed, parsing: %s",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user