documentation
This commit is contained in:
parent
dd6cdb1aac
commit
4c6c817431
4
Makefile
4
Makefile
@ -52,10 +52,12 @@ headexported.go: .build/headexported.go
|
|||||||
cp .build/headexported.go .
|
cp .build/headexported.go .
|
||||||
|
|
||||||
.build/self.go: $(sources) $(parsers) head.go headexported.go .build
|
.build/self.go: $(sources) $(parsers) head.go headexported.go .build
|
||||||
# since generator code depends on the syntax itself, we need to passes:
|
# since the generator code depends on the syntax itself, and such influences its own output, we need two
|
||||||
|
# passes:
|
||||||
go build -o .build/treerack.current ./cmd/treerack
|
go build -o .build/treerack.current ./cmd/treerack
|
||||||
.build/treerack.current generate --export --package-name self < syntax.treerack > .build/self.go
|
.build/treerack.current generate --export --package-name self < syntax.treerack > .build/self.go
|
||||||
go fmt .build/self.go
|
go fmt .build/self.go
|
||||||
|
# we backup the original and apply the new:
|
||||||
cp internal/self/self.go .build/self.go.backup
|
cp internal/self/self.go .build/self.go.backup
|
||||||
cp .build/self.go internal/self
|
cp .build/self.go internal/self
|
||||||
# second pass:
|
# second pass:
|
||||||
|
|||||||
9
buzz.txt
9
buzz.txt
@ -1,9 +0,0 @@
|
|||||||
generator, in-process init or command line
|
|
||||||
syntax from text or defined during runtime, or combined
|
|
||||||
simple syntax with recursion
|
|
||||||
no lexer required
|
|
||||||
utf8, 8bit or custom tokens
|
|
||||||
abstract syntax tree from text of arbitrary syntax
|
|
||||||
reading from streams
|
|
||||||
context free, however support for custom tokens in the input
|
|
||||||
custom tokens for indentation built in
|
|
||||||
@ -2,49 +2,48 @@
|
|||||||
Generated with https://code.squareroundforest.org/arpio/docreflect
|
Generated with https://code.squareroundforest.org/arpio/docreflect
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import "code.squareroundforest.org/arpio/docreflect"
|
import "code.squareroundforest.org/arpio/docreflect"
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
docreflect.Register("main", "")
|
docreflect.Register("main", "")
|
||||||
docreflect.Register("main.check", "check parses input content against the provided syntax definition and fails if the input does not match.\nSyntax can be provided via a filename option or an inline string option. Input can be provided via a filename\noption, a positional argument filename, an inline string option, or piped from standard input.\n\nfunc(o, stdin, args)")
|
docreflect.Register("main.check", "check parses input content against the provided syntax definition and fails if the input does not match.\nSyntax can be provided via a filename option or an inline string option. Input can be provided via a filename\noption, a positional argument filename, an inline string option, or piped from standard input.\n\nfunc(o, stdin, args)")
|
||||||
docreflect.Register("main.checkOptions", "")
|
docreflect.Register("main.checkOptions", "")
|
||||||
docreflect.Register("main.checkOptions.Input", "Input specifies the filename of the input content to be validated.\n")
|
docreflect.Register("main.checkOptions.Input", "Input specifies the filename of the input content to be validated.\n")
|
||||||
docreflect.Register("main.checkOptions.InputString", "InputString specifies the input content as an inline string.\n")
|
docreflect.Register("main.checkOptions.InputString", "InputString specifies the input content as an inline string.\n")
|
||||||
docreflect.Register("main.checkOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
docreflect.Register("main.checkOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||||
docreflect.Register("main.checkOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
docreflect.Register("main.checkOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||||
docreflect.Register("main.checkSyntax", "checkSyntax validates a syntax definition. The syntax may be provided via a file path (using an option or a\npositional argument), an inline string, or piped from standard input.\n\nfunc(o, stdin, args)")
|
docreflect.Register("main.checkSyntax", "checkSyntax validates a syntax definition. The syntax may be provided via a file path (using an option or a\npositional argument), an inline string, or piped from standard input.\n\nfunc(o, stdin, args)")
|
||||||
docreflect.Register("main.checkSyntaxOptions", "")
|
docreflect.Register("main.checkSyntaxOptions", "")
|
||||||
docreflect.Register("main.checkSyntaxOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
docreflect.Register("main.checkSyntaxOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||||
docreflect.Register("main.checkSyntaxOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
docreflect.Register("main.checkSyntaxOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||||
docreflect.Register("main.errInvalidFilename", "")
|
docreflect.Register("main.errInvalidFilename", "")
|
||||||
docreflect.Register("main.errMultipleInputs", "")
|
docreflect.Register("main.errMultipleInputs", "")
|
||||||
docreflect.Register("main.errNoInput", "")
|
docreflect.Register("main.errNoInput", "")
|
||||||
docreflect.Register("main.generate", "generate generates Go code that can parse arbitrary input with the provided syntax, and can be used embedded\nin an application.\n\nThe syntax may be provided via a file path (using an option or a positional argument), an\ninline string, or piped from standard input.\n\nfunc(o, stdin, stdout, args)")
|
docreflect.Register("main.generate", "generate generates Go code that can parse arbitrary input with the provided syntax, and can be used embedded\nin an application.\n\nThe syntax may be provided via a file path (using an option or a positional argument), an\ninline string, or piped from standard input.\n\nfunc(o, stdin, stdout, args)")
|
||||||
docreflect.Register("main.generateOptions", "")
|
docreflect.Register("main.generateOptions", "")
|
||||||
docreflect.Register("main.generateOptions.Export", "Export determines whether the generated parse function is exported (visible outside its package).\n")
|
docreflect.Register("main.generateOptions.Export", "Export determines whether the generated parse function is exported (visible outside its package).\n")
|
||||||
docreflect.Register("main.generateOptions.PackageName", "PackageName specifies the package name for the generated code. Defaults to main.\n")
|
docreflect.Register("main.generateOptions.PackageName", "PackageName specifies the package name for the generated code. Defaults to main.\n")
|
||||||
docreflect.Register("main.generateOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
docreflect.Register("main.generateOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||||
docreflect.Register("main.generateOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
docreflect.Register("main.generateOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||||
docreflect.Register("main.init", "\nfunc()")
|
docreflect.Register("main.init", "\nfunc()")
|
||||||
docreflect.Register("main.initInput", "\nfunc(filename, stringValue, stdin, args)")
|
docreflect.Register("main.initInput", "\nfunc(filename, stringValue, stdin, args)")
|
||||||
docreflect.Register("main.main", "\nfunc()")
|
docreflect.Register("main.main", "\nfunc()")
|
||||||
docreflect.Register("main.mapNode", "\nfunc(n)")
|
docreflect.Register("main.mapNode", "\nfunc(n)")
|
||||||
docreflect.Register("main.node", "")
|
docreflect.Register("main.node", "")
|
||||||
docreflect.Register("main.node.From", "")
|
docreflect.Register("main.node.From", "")
|
||||||
docreflect.Register("main.node.Name", "")
|
docreflect.Register("main.node.Name", "")
|
||||||
docreflect.Register("main.node.Nodes", "")
|
docreflect.Register("main.node.Nodes", "")
|
||||||
docreflect.Register("main.node.Text", "")
|
docreflect.Register("main.node.Text", "")
|
||||||
docreflect.Register("main.node.To", "")
|
docreflect.Register("main.node.To", "")
|
||||||
docreflect.Register("main.noop", "\nfunc()")
|
docreflect.Register("main.noop", "\nfunc()")
|
||||||
docreflect.Register("main.show", "show input content against a provided syntax definition and outputs the resulting AST (Abstract Syntax Tree)\nin JSON format. Syntax can be provided via a filename option or an inline string option. Input can be\nprovided via a filename option, a positional argument filename, an inline string option, or piped from\nstandard input.\n\nfunc(o, stdin, stdout, args)")
|
docreflect.Register("main.show", "show input content against a provided syntax definition and outputs the resulting AST (Abstract Syntax Tree)\nin JSON format. Syntax can be provided via a filename option or an inline string option. Input can be\nprovided via a filename option, a positional argument filename, an inline string option, or piped from\nstandard input.\n\nfunc(o, stdin, stdout, args)")
|
||||||
docreflect.Register("main.showOptions", "")
|
docreflect.Register("main.showOptions", "")
|
||||||
docreflect.Register("main.showOptions.Indent", "Indent specifies a custom indentation string for the output.\n")
|
docreflect.Register("main.showOptions.Indent", "Indent specifies a custom indentation string for the output.\n")
|
||||||
docreflect.Register("main.showOptions.Input", "Input specifies the filename of the input content to be validated.\n")
|
docreflect.Register("main.showOptions.Input", "Input specifies the filename of the input content to be validated.\n")
|
||||||
docreflect.Register("main.showOptions.InputString", "InputString specifies the input content as an inline string.\n")
|
docreflect.Register("main.showOptions.InputString", "InputString specifies the input content as an inline string.\n")
|
||||||
docreflect.Register("main.showOptions.Pretty", "Pretty enables indented, human-readable output.\n")
|
docreflect.Register("main.showOptions.Pretty", "Pretty enables indented, human-readable output.\n")
|
||||||
docreflect.Register("main.showOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
docreflect.Register("main.showOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n")
|
||||||
docreflect.Register("main.showOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
docreflect.Register("main.showOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n")
|
||||||
docreflect.Register("main.version", "")
|
docreflect.Register("main.version", "")
|
||||||
}
|
}
|
||||||
@ -105,5 +105,9 @@ func show(o showOptions, stdin io.Reader, stdout io.Writer, args ...string) erro
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if _, err := stdout.Write([]byte{'\n'}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -128,7 +128,7 @@ func TestShow(t *testing.T) {
|
|||||||
t.Fatal(nil)
|
t.Fatal(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` {
|
if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
|
||||||
t.Fatal(out.String())
|
t.Fatal(out.String())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -144,7 +144,7 @@ func TestShow(t *testing.T) {
|
|||||||
t.Fatal(nil)
|
t.Fatal(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` {
|
if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
|
||||||
t.Fatal(out.String())
|
t.Fatal(out.String())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -159,7 +159,7 @@ func TestShow(t *testing.T) {
|
|||||||
t.Fatal(nil)
|
t.Fatal(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` {
|
if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
|
||||||
t.Fatal(out.String())
|
t.Fatal(out.String())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -172,7 +172,7 @@ func TestShow(t *testing.T) {
|
|||||||
t.Fatal(nil)
|
t.Fatal(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` {
|
if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" {
|
||||||
t.Fatal(out.String())
|
t.Fatal(out.String())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -189,7 +189,7 @@ func TestShow(t *testing.T) {
|
|||||||
t.Fatal(nil)
|
t.Fatal(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
const expect = "{\n \"name\": \"foo\",\n \"from\": 0,\n \"to\": 3,\n \"text\": \"bar\"\n}"
|
const expect = "{\n \"name\": \"foo\",\n \"from\": 0,\n \"to\": 3,\n \"text\": \"bar\"\n}\n"
|
||||||
if out.String() != expect {
|
if out.String() != expect {
|
||||||
t.Fatal(out.String())
|
t.Fatal(out.String())
|
||||||
}
|
}
|
||||||
@ -207,7 +207,7 @@ func TestShow(t *testing.T) {
|
|||||||
t.Fatal(nil)
|
t.Fatal(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}" {
|
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}\n" {
|
||||||
t.Fatal(out.String())
|
t.Fatal(out.String())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -225,7 +225,7 @@ func TestShow(t *testing.T) {
|
|||||||
t.Fatal(nil)
|
t.Fatal(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}" {
|
if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}\n" {
|
||||||
t.Fatal(out.String())
|
t.Fatal(out.String())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
47
docs/examples/acalc/acalc.treerack
Normal file
47
docs/examples/acalc/acalc.treerack
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
// first define our whitespace chars:
|
||||||
|
ignore:ws = " " | [\t] | [\r] | [\n];
|
||||||
|
|
||||||
|
// define the format of input numbers. With the :nows flag we declare that we don't expect ignored spaces
|
||||||
|
// between the digits and the delimiters. We support integers, floating point numbers, and floating point
|
||||||
|
// numbers with their exponential notation. We don't support arbitrary leading zeros to avoid confusion with the
|
||||||
|
// octal representation of numbers, which is not supported here.
|
||||||
|
num:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?;
|
||||||
|
|
||||||
|
// define the supported operators:
|
||||||
|
add = "+";
|
||||||
|
sub = "-";
|
||||||
|
mul = "*";
|
||||||
|
div = "/";
|
||||||
|
|
||||||
|
// let's define grouping. Any expression can be grouped. The definition of the expression can be found further
|
||||||
|
// down in the syntax document. This usage of the expression reference is also a good example for recursive
|
||||||
|
// definitions. Using the :alias flag prevents generating a separate node in the resulting AST.
|
||||||
|
group:alias = "(" expression ")";
|
||||||
|
|
||||||
|
// we group the operators by precedence. This is necessary to parse the expressions like a * b + c in a structure
|
||||||
|
// that is equivalent to (a * b) + c.
|
||||||
|
op0:alias = mul | div;
|
||||||
|
op1:alias = add | sub;
|
||||||
|
|
||||||
|
// we also define which operands can be used at which precedence level. Notice, how operand1 also allows binary0
|
||||||
|
// expressions.
|
||||||
|
operand0:alias = num | group;
|
||||||
|
operand1:alias = operand0 | binary0;
|
||||||
|
|
||||||
|
// using the prioritized operators, we can define the prioritized binary expressions. We support a + b + c, and
|
||||||
|
// not only a + b.
|
||||||
|
binary0 = operand0 (op0 operand0)+;
|
||||||
|
binary1 = operand1 (op1 operand1)+;
|
||||||
|
binary:alias = binary0 | binary1;
|
||||||
|
|
||||||
|
// let's define, what an expression can be. Notice the recursion along expression and group.
|
||||||
|
expression:alias = num | group | binary;
|
||||||
|
|
||||||
|
// finally, define the root of the parser, the result of the arithmetic expression. It can be any expression,
|
||||||
|
// but since we used the :alias flag for the expression definition, we need to add a non-alias parser that will
|
||||||
|
// represent the root of the resulting AST. This also allows us to define an "exit" token, which can be used
|
||||||
|
// exit from the REPL loop of our application.
|
||||||
|
//
|
||||||
|
// Note that we don't need to use the :root flag here, because it is our last definition, and this means that
|
||||||
|
// the expression is the root parser of the syntax.
|
||||||
|
result = expression | "exit"
|
||||||
3
docs/examples/acalc/go.mod
Normal file
3
docs/examples/acalc/go.mod
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
module acalac
|
||||||
|
|
||||||
|
go 1.25.4
|
||||||
143
docs/examples/acalc/main.go
Normal file
143
docs/examples/acalc/main.go
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
var errExit = errors.New("exit")
|
||||||
|
|
||||||
|
func repl(input io.Reader, output io.Writer) {
|
||||||
|
// use buffered io, to be able to read the input line-by-line:
|
||||||
|
buf := bufio.NewReader(os.Stdin)
|
||||||
|
|
||||||
|
// our REPL loop:
|
||||||
|
for {
|
||||||
|
// print a basic prompt:
|
||||||
|
if _, err := output.Write([]byte("> ")); err != nil {
|
||||||
|
|
||||||
|
// we cannot fix it if there is an error here:
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// read the input and handle the errors:
|
||||||
|
expr, err := read(buf)
|
||||||
|
|
||||||
|
// when EOF, that means the user pressed Ctrl+D. Let's terminate the output with a conventional newline
|
||||||
|
// and exit:
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
output.Write([]byte{'\n'})
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// when errExit, that means the user entered exit:
|
||||||
|
if errors.Is(err, errExit) {
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// if it's a parser error, we print and continue from reading again, to allow the user to fix the
|
||||||
|
// problem:
|
||||||
|
var perr *parseError
|
||||||
|
if errors.As(err, &perr) {
|
||||||
|
log.Println(err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// in case of any other error, we don't know what's going on, so we get out of here right away:
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we received an expression, then we can evaluate it. We are not expecting errors here:
|
||||||
|
result := eval(expr)
|
||||||
|
|
||||||
|
// we have the result, we need to print it:
|
||||||
|
if err := print(output, result); err != nil {
|
||||||
|
|
||||||
|
// if printing fails, we don't know how to fix it, so we get out of here:
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func read(input *bufio.Reader) (*node, error) {
|
||||||
|
line, err := input.ReadString('\n')
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// expr will be of type *node, which type is defined in the generated code
|
||||||
|
expr, err := parse(bytes.NewBufferString(line))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.TrimSpace(expr.Text()) == "exit" {
|
||||||
|
return nil, errExit
|
||||||
|
}
|
||||||
|
|
||||||
|
// we know based on the syntax, that the top level node will always have a single child, either a number
|
||||||
|
// literal or a binary operation:
|
||||||
|
return expr.Nodes[0], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// eval always returns the calculated result as a float64:
|
||||||
|
func eval(expr *node) float64 {
|
||||||
|
|
||||||
|
// we know that it's either a number or a binary operation:
|
||||||
|
var value float64
|
||||||
|
switch expr.Name {
|
||||||
|
case "num":
|
||||||
|
|
||||||
|
// the number format in our syntax is based on the JSON spec, so we can piggy-back on it for the number
|
||||||
|
// parsing. In a real application, we would need to handle the errors here anyway, even if our parser
|
||||||
|
// already validated the input:
|
||||||
|
json.Unmarshal([]byte(expr.Text()), &value)
|
||||||
|
return value
|
||||||
|
default:
|
||||||
|
|
||||||
|
// we know that the first node is either a number of a child expression:
|
||||||
|
value, expr.Nodes = eval(expr.Nodes[0]), expr.Nodes[1:]
|
||||||
|
|
||||||
|
// we don't need to track back, so we can drop the processed nodes while consuming them:
|
||||||
|
for len(expr.Nodes) > 0 {
|
||||||
|
var (
|
||||||
|
operator string
|
||||||
|
operand float64
|
||||||
|
)
|
||||||
|
|
||||||
|
operator, operand, expr.Nodes = expr.Nodes[0].Name, eval(expr.Nodes[1]), expr.Nodes[2:]
|
||||||
|
switch operator {
|
||||||
|
case "add":
|
||||||
|
value += operand
|
||||||
|
case "sub":
|
||||||
|
value -= operand
|
||||||
|
case "mul":
|
||||||
|
value *= operand
|
||||||
|
case "div":
|
||||||
|
// Go returns -Inf or +Inf on division by zero:
|
||||||
|
value /= operand
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func print(output io.Writer, result float64) error {
|
||||||
|
_, err := fmt.Fprintln(output, result)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// for testability, we define the REPL loop in a separate function so that the test code can call it with
|
||||||
|
// in-memory buffers as input and output. Our main function calls it with the stdio handles:
|
||||||
|
repl(os.Stdin, os.Stdout)
|
||||||
|
}
|
||||||
824
docs/examples/acalc/parser.go
Normal file
824
docs/examples/acalc/parser.go
Normal file
File diff suppressed because one or more lines are too long
629
docs/manual.md
Normal file
629
docs/manual.md
Normal file
@ -0,0 +1,629 @@
|
|||||||
|
# Treerack Manual
|
||||||
|
|
||||||
|
This manual describes the primary use cases and workflows supported by Treerack.
|
||||||
|
|
||||||
|
## Prerequisits
|
||||||
|
|
||||||
|
We assume a working installation of the standard Go tooling.
|
||||||
|
|
||||||
|
This manual relies on the treerack command-line tool. We can install it using one of the following methods.
|
||||||
|
|
||||||
|
**A. source installation (requires make):**
|
||||||
|
|
||||||
|
1. clone the repository `git clone https://code.squareroundforest.org/arpio/treerack`
|
||||||
|
2. navigate to the source directory, run: `make install`. To install it to a custom location, use the `prefix`
|
||||||
|
environment variable, e.g. run `prefix=~/.local make install`
|
||||||
|
3. verify the installation: run `treerack version` and `man treerack`
|
||||||
|
|
||||||
|
**B. via go install:**
|
||||||
|
|
||||||
|
Alternatively, we _may be able to_ install directly using the Go toolchain:
|
||||||
|
|
||||||
|
1. run `go install code.squareroundforest.org/arpio/treerack/cmd/treerack`
|
||||||
|
2. verify: `treerack help`
|
||||||
|
|
||||||
|
## Hello syntax
|
||||||
|
|
||||||
|
A basic syntax definition looks like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
hello = "Hello, world!"
|
||||||
|
```
|
||||||
|
|
||||||
|
This definition matches only the exact string "Hello, world!" and nothing else. To test the validity of this
|
||||||
|
rule, run:
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack check-syntax --syntax-string 'hello = "Hello, world!"'
|
||||||
|
```
|
||||||
|
|
||||||
|
If successful, the command exits silently with code 0. (We can append && echo ok to advertise successful
|
||||||
|
execution).
|
||||||
|
|
||||||
|
To test the syntax against actual input content:
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack check --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!'
|
||||||
|
```
|
||||||
|
|
||||||
|
To visualize the resulting Abstract Syntax Tree (AST), use the show subcommand:
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack show --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!'
|
||||||
|
```
|
||||||
|
|
||||||
|
The output will be raw JSON:
|
||||||
|
|
||||||
|
```
|
||||||
|
{"name":"hello","from":0,"to":13,"text":"Hello, world!"}
|
||||||
|
```
|
||||||
|
|
||||||
|
For a more readable output, add the --pretty flag:
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack show --pretty --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!'
|
||||||
|
```
|
||||||
|
|
||||||
|
...then the output will look like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"name": "hello",
|
||||||
|
"from": 0,
|
||||||
|
"to": 13,
|
||||||
|
"text": "Hello, world!"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Handling errors
|
||||||
|
|
||||||
|
If our syntax definition is invalid, check-syntax will fail:
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack check-syntax --syntax-string 'foo = bar'
|
||||||
|
```
|
||||||
|
|
||||||
|
The above command will fail because the parser called foo references an undefined parser bar.
|
||||||
|
|
||||||
|
We can use check or show to detect when the input content does not match a valid syntax. Using the hello syntax,
|
||||||
|
we can try the following:
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack check --syntax-string 'hello = "Hello, world!"' --input-string 'Hi!'
|
||||||
|
```
|
||||||
|
|
||||||
|
It will show that parsing the input failed and that it failed while using the parser hello.
|
||||||
|
|
||||||
|
## Basic syntax - An arithmetic calculator
|
||||||
|
|
||||||
|
In this section, we will build a basic arithmetic calculator. It will read a line from standard input, parse it
|
||||||
|
as an arithmetic expression, compute the result, and print it—effectively creating a REPL (Read-Eval-Print
|
||||||
|
Loop).
|
||||||
|
|
||||||
|
We will support addition +, subtraction -, multiplication *, division /, and grouping with parentheses ().
|
||||||
|
|
||||||
|
acalc.treerack:
|
||||||
|
|
||||||
|
```
|
||||||
|
// Define whitespace characters.
|
||||||
|
// The :ws flag marks this as the global whitespace handler.
|
||||||
|
ignore:ws = " " | [\t] | [\r] | [\n];
|
||||||
|
|
||||||
|
// Define the number format.
|
||||||
|
//
|
||||||
|
// The :nows flag ensures we do not skip whitespace *inside* the number token. We support integers, floats, and
|
||||||
|
// scientific notation (e.g., 1.5e3). Arbitrary leading zeros are disallowed to prevent confusion with octal
|
||||||
|
// literals.
|
||||||
|
num:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?;
|
||||||
|
|
||||||
|
// define the supported operators:
|
||||||
|
add = "+";
|
||||||
|
sub = "-";
|
||||||
|
mul = "*";
|
||||||
|
div = "/";
|
||||||
|
|
||||||
|
// Grouping logic.
|
||||||
|
//
|
||||||
|
// Expressions can be enclosed in parentheses. This references 'expression', which is defined later,
|
||||||
|
// demonstrating recursive definitions. The :alias flag prevents 'group' from creating its own node in the AST;
|
||||||
|
// only the child 'expression' will appear.
|
||||||
|
group:alias = "(" expression ")";
|
||||||
|
|
||||||
|
// Operator Precedence.
|
||||||
|
//
|
||||||
|
// We group operators by precedence levels to ensure correct order of operations.
|
||||||
|
//
|
||||||
|
// Level 0 (High): Multiplication/Division
|
||||||
|
op0:alias = mul | div;
|
||||||
|
|
||||||
|
// Level 1 (Low): Addition/Subtraction
|
||||||
|
op1:alias = add | sub;
|
||||||
|
|
||||||
|
// Operands for each precedence level.
|
||||||
|
//
|
||||||
|
// operand0 can be a raw number or a grouped expression.
|
||||||
|
operand0:alias = num | group;
|
||||||
|
|
||||||
|
// operand1 can be a higher-precedence operand or a completed binary0 operation.
|
||||||
|
operand1:alias = operand0 | binary0;
|
||||||
|
|
||||||
|
// Binary Expressions.
|
||||||
|
//
|
||||||
|
// We define these hierarchically. 'binary0' handles high-precedence operations (mul/div).
|
||||||
|
binary0 = operand0 (op0 operand0)+;
|
||||||
|
binary1 = operand1 (op1 operand1)+;
|
||||||
|
binary:alias = binary0 | binary1;
|
||||||
|
|
||||||
|
// The generalized Expression.
|
||||||
|
//
|
||||||
|
// An expression is either a raw number, a group, or a binary operation.
|
||||||
|
expression:alias = num | group | binary;
|
||||||
|
|
||||||
|
// Root Definition.
|
||||||
|
//
|
||||||
|
// The final result is either a valid expression or the "exit" command. Since 'expression' is an alias, we need
|
||||||
|
// a concrete root parser to anchor the AST. Note: The :root flag is optional here because this is the last
|
||||||
|
// definition in the file.
|
||||||
|
result = expression | "exit"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing the syntax
|
||||||
|
|
||||||
|
#### 1. Simple number
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack show --pretty --syntax acalc.treerack --input-string 42
|
||||||
|
```
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"name": "result",
|
||||||
|
"from": 0,
|
||||||
|
"to": 2,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "num",
|
||||||
|
"from": 0,
|
||||||
|
"to": 2,
|
||||||
|
"text": "42"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Basic operation
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack show --pretty --syntax acalc.treerack --input-string "42 + 24"
|
||||||
|
```
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"name": "expression",
|
||||||
|
"from": 0,
|
||||||
|
"to": 7,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "binary1",
|
||||||
|
"from": 0,
|
||||||
|
"to": 7,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "num",
|
||||||
|
"from": 0,
|
||||||
|
"to": 2,
|
||||||
|
"text": "42"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "add",
|
||||||
|
"from": 3,
|
||||||
|
"to": 4,
|
||||||
|
"text": "+"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "num",
|
||||||
|
"from": 5,
|
||||||
|
"to": 7,
|
||||||
|
"text": "24"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Precedence check
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack show --pretty --syntax acalc.treerack --input-string "42 + 24 * 2"
|
||||||
|
```
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"name": "result",
|
||||||
|
"from": 0,
|
||||||
|
"to": 11,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "binary1",
|
||||||
|
"from": 0,
|
||||||
|
"to": 11,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "num",
|
||||||
|
"from": 0,
|
||||||
|
"to": 2,
|
||||||
|
"text": "42"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "add",
|
||||||
|
"from": 3,
|
||||||
|
"to": 4,
|
||||||
|
"text": "+"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "binary0",
|
||||||
|
"from": 5,
|
||||||
|
"to": 11,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "num",
|
||||||
|
"from": 5,
|
||||||
|
"to": 7,
|
||||||
|
"text": "24"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "mul",
|
||||||
|
"from": 8,
|
||||||
|
"to": 9,
|
||||||
|
"text": "*"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "num",
|
||||||
|
"from": 10,
|
||||||
|
"to": 11,
|
||||||
|
"text": "2"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 4. Grouping override
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack show --pretty --syntax acalc.treerack --input-string "(42 + 24) * 2"
|
||||||
|
```
|
||||||
|
|
||||||
|
Notice how the 'group' alias node is not present, but now the expression of the addition is a factor in the
|
||||||
|
multiplication:
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"name": "result",
|
||||||
|
"from": 0,
|
||||||
|
"to": 13,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "binary0",
|
||||||
|
"from": 0,
|
||||||
|
"to": 13,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "binary1",
|
||||||
|
"from": 1,
|
||||||
|
"to": 8,
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"name": "num",
|
||||||
|
"from": 1,
|
||||||
|
"to": 3,
|
||||||
|
"text": "42"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "add",
|
||||||
|
"from": 4,
|
||||||
|
"to": 5,
|
||||||
|
"text": "+"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "num",
|
||||||
|
"from": 6,
|
||||||
|
"to": 8,
|
||||||
|
"text": "24"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "mul",
|
||||||
|
"from": 10,
|
||||||
|
"to": 11,
|
||||||
|
"text": "*"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "num",
|
||||||
|
"from": 12,
|
||||||
|
"to": 13,
|
||||||
|
"text": "2"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Generator - Implementing the calculator
|
||||||
|
|
||||||
|
We will now generate the Go parser code and integrate it into a CLI application.
|
||||||
|
|
||||||
|
Initialize the project:
|
||||||
|
|
||||||
|
```
|
||||||
|
go mod init acalc && go mod tidy
|
||||||
|
```
|
||||||
|
|
||||||
|
Generate the parser:
|
||||||
|
|
||||||
|
```
|
||||||
|
treerack generate --syntax acalc.treerack > parser.go
|
||||||
|
```
|
||||||
|
|
||||||
|
Implement the application logic in main.go.
|
||||||
|
|
||||||
|
main.go:
|
||||||
|
|
||||||
|
```
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
var errExit = errors.New("exit")
|
||||||
|
|
||||||
|
// repl runs the Read-Eval-Print Loop.
|
||||||
|
func repl(input io.Reader, output io.Writer) {
|
||||||
|
|
||||||
|
// use buffered io, to be able to read the input line-by-line:
|
||||||
|
buf := bufio.NewReader(os.Stdin)
|
||||||
|
|
||||||
|
// our REPL loop:
|
||||||
|
for {
|
||||||
|
// print a basic prompt:
|
||||||
|
if _, err := output.Write([]byte("> ")); err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// read the input and handle the errors:
|
||||||
|
expr, err := read(buf)
|
||||||
|
|
||||||
|
// Handle EOF (Ctrl+D)
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
output.Write([]byte{'\n'})
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle explicit exit command
|
||||||
|
if errors.Is(err, errExit) {
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle parser errors (allow user to retry)
|
||||||
|
var perr *parseError
|
||||||
|
if errors.As(err, &perr) {
|
||||||
|
log.Println(err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Evaluate and print
|
||||||
|
result := eval(expr)
|
||||||
|
if err := print(output, result); err != nil {
|
||||||
|
log.Fatalln(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func read(input *bufio.Reader) (*node, error) {
|
||||||
|
line, err := input.ReadString('\n')
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the line using the generated parser
|
||||||
|
expr, err := parse(bytes.NewBufferString(line))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.TrimSpace(expr.Text()) == "exit" {
|
||||||
|
return nil, errExit
|
||||||
|
}
|
||||||
|
|
||||||
|
// Based on our syntax, the root node always has exactly one child:
|
||||||
|
// either a number or a binary operation.
|
||||||
|
return expr.Nodes[0], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// eval always returns the calculated result as a float64:
|
||||||
|
func eval(expr *node) float64 {
|
||||||
|
var value float64
|
||||||
|
switch expr.Name {
|
||||||
|
case "num":
|
||||||
|
|
||||||
|
// the number format in our syntax is based on the JSON spec, so we can piggy-back on it for the number
|
||||||
|
// parsing. In a real application, we would need to handle the errors here anyway, even if our parser
|
||||||
|
// already validated the input:
|
||||||
|
json.Unmarshal([]byte(expr.Text()), &value)
|
||||||
|
return value
|
||||||
|
default:
|
||||||
|
|
||||||
|
// Handle binary expressions (recursively)
|
||||||
|
// Format: Operand [Operator Operand]...
|
||||||
|
value, expr.Nodes = eval(expr.Nodes[0]), expr.Nodes[1:]
|
||||||
|
for len(expr.Nodes) > 0 {
|
||||||
|
var (
|
||||||
|
operator string
|
||||||
|
operand float64
|
||||||
|
)
|
||||||
|
|
||||||
|
operator, operand, expr.Nodes = expr.Nodes[0].Name, eval(expr.Nodes[1]), expr.Nodes[2:]
|
||||||
|
switch operator {
|
||||||
|
case "add":
|
||||||
|
value += operand
|
||||||
|
case "sub":
|
||||||
|
value -= operand
|
||||||
|
case "mul":
|
||||||
|
value *= operand
|
||||||
|
case "div":
|
||||||
|
value /= operand // Go handles division by zero as ±Inf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func print(output io.Writer, result float64) error {
|
||||||
|
_, err := fmt.Fprintln(output, result)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// for testability, we define the REPL loop in a separate function so that the test code can call it with
|
||||||
|
// in-memory buffers as input and output. Our main function calls it with the stdio handles:
|
||||||
|
repl(os.Stdin, os.Stdout)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running the calculator
|
||||||
|
|
||||||
|
Our arithmetic calculator is now ready. We can run it via `go run .`. An example session may look like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ go run .
|
||||||
|
> (42 + 24) * 2
|
||||||
|
132
|
||||||
|
> 42 + 24 * 2
|
||||||
|
90
|
||||||
|
> 1 + 2 + 3
|
||||||
|
6
|
||||||
|
> exit
|
||||||
|
```
|
||||||
|
|
||||||
|
We can find the source files for this example here: [./examples/acalc](./examples/acalc).
|
||||||
|
|
||||||
|
## Important Note: Unescaping
|
||||||
|
|
||||||
|
Treerack does not automatically handle escape sequences (e.g., converting \n to a literal newline). If our
|
||||||
|
syntax supports escaped characters—common in string literals—the user code is responsible for "unescaping" the
|
||||||
|
raw text from the AST node.
|
||||||
|
|
||||||
|
This is analogous to how we needed to parse the numbers in the calculator example to convert the string
|
||||||
|
representation of a number into a Go float64.
|
||||||
|
|
||||||
|
## Programmatically loading syntaxes
|
||||||
|
|
||||||
|
While generating static code via treerack generate is the recommended approach, we can also load definitions
|
||||||
|
dynamically at runtime.
|
||||||
|
|
||||||
|
```
|
||||||
|
package parser
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"code.squareroundforest.org/arpio/treerack"
|
||||||
|
)
|
||||||
|
|
||||||
|
func initAndParse(syntax, content io.Reader) (*treerack.Node, error) {
|
||||||
|
s := &treerack.Syntax{}
|
||||||
|
if err := s.ReadSyntax(syntax); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.Init(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.Parse(content)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Caution: Be mindful of security implications when loading syntax definitions from untrusted sources.
|
||||||
|
|
||||||
|
## Programmatically defining syntaxes
|
||||||
|
|
||||||
|
In rare cases where a syntax must be constructed computationally, we can define rules via the Go API:
|
||||||
|
|
||||||
|
```
|
||||||
|
package parser
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"code.squareroundforest.org/arpio/treerack"
|
||||||
|
)
|
||||||
|
|
||||||
|
func initAndParse(content io.Reader) (*treerack.Node, error) {
|
||||||
|
s := &treerack.Syntax{}
|
||||||
|
|
||||||
|
// whitespace:
|
||||||
|
s.Class("whitespace-chars", treerack.Alias, false, []rune{' ', '\t', '\r\, '\n'}, nil)
|
||||||
|
s.Choice("whitespace", treerack.Whitespace, "whitespace-chars")
|
||||||
|
|
||||||
|
s.Class("digit", treerack.Alias, false, nil, [][]rune{'0', '9'})
|
||||||
|
s.Sequence("number", treerack.NoWhitespace, treerack.SequenceItem{Name: "digit", Min: 1})
|
||||||
|
s.Class("operator", treerack.None, false, []rune{'+', '-'}, nil)
|
||||||
|
s.Sequence(
|
||||||
|
"expression",
|
||||||
|
treerack.Root,
|
||||||
|
treerack.SequenceItem{Name: "number"},
|
||||||
|
treerack.SequenceItem{Name: "operator"},
|
||||||
|
treerack.SequenceItem{Name: "number"},
|
||||||
|
)
|
||||||
|
|
||||||
|
if err := s.Init(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.Parse(content)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
We have demonstrated how to use the Treerack tool to define, test, and implement a parser. We recommend the
|
||||||
|
following workflow:
|
||||||
|
|
||||||
|
1. draft: define a syntax in a .treerack file.
|
||||||
|
2. verify: use `treerack check` and `treerack show` to validate building blocks incrementally.
|
||||||
|
3. generate: use `treerack generate` to create embeddable Go code.
|
||||||
|
|
||||||
|
**Links:**
|
||||||
|
|
||||||
|
- the detailed documentation of the treerack definition language: [./syntax.md](./syntax.md)
|
||||||
|
- treerack command help: [../cmd/treerack/readme.md](../cmd/treerack/readme.md) or, if the command is installed,
|
||||||
|
`man treerack`, or `path/to/treerack help`
|
||||||
|
- the arithmetic calculator example: [./examples/acalc](./examples/acalc).
|
||||||
|
- additional examples: [./examples](./examples)
|
||||||
|
|
||||||
|
Happy parsing!
|
||||||
121
docs/syntax.md
Normal file
121
docs/syntax.md
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
# Treerack Syntax Definition Language
|
||||||
|
|
||||||
|
The Treerack library uses a custom grammar description language derived from EBNF (Extended Backus-Naur Form).
|
||||||
|
It allows for the concise definition of recursive descent parsers.
|
||||||
|
|
||||||
|
A syntax file consists of a series of Production Rules (definitions), terminated by semicolons.
|
||||||
|
|
||||||
|
## Production Rules
|
||||||
|
|
||||||
|
A rule assigns a name to a pattern expression. Rules may include optional flags to modify the parser's behavior
|
||||||
|
or the resulting AST (Abstract Syntax Tree).
|
||||||
|
|
||||||
|
```
|
||||||
|
RuleName = Expression;
|
||||||
|
RuleName:flag1:flag2 = Expression;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
|
||||||
|
Flags are appended to the rule name, separated by colons. They control AST generation, whitespace handling, and
|
||||||
|
error propagation.
|
||||||
|
|
||||||
|
- `alias`: Transparent Node. The rule validates input but does not create its own node in the AST. Children
|
||||||
|
nodes (if any) are attached to the parent of this rule.
|
||||||
|
- `ws`: Global Whitespace. Marks this rule as the designated whitespace handler. The parser will attempt to
|
||||||
|
match (and discard) this rule between tokens throughout the entire syntax.
|
||||||
|
- `nows`: No Whitespace. Disables automatic whitespace skipping inside this rule. Useful for defining tokens
|
||||||
|
like string literals where spaces are significant.
|
||||||
|
- `root`: Entry Point. Explicitly marks the rule as the starting point of the syntax. If omitted, the last
|
||||||
|
defined rule is implied to be the root.
|
||||||
|
- `kw`: Keyword. Marks the content as a reserved keyword.
|
||||||
|
- `nokw`: No Keyword. Prevents the rule from matching text that matches a defined kw rule. Essential for
|
||||||
|
distinguishing identifiers from keywords (e.g., ensuring var is not parsed as a variable name).
|
||||||
|
- `failpass`: Pass Failure. If this rule fails to parse, the error is reported as a failure of the parent rule,
|
||||||
|
not this specific rule.
|
||||||
|
|
||||||
|
## Expressions
|
||||||
|
|
||||||
|
Expressions define the structure of the text to be parsed. They are composed of terminals, sequences, choices,
|
||||||
|
and quantifiers.
|
||||||
|
|
||||||
|
## Terminals
|
||||||
|
|
||||||
|
Terminals match specific characters or strings in the input.
|
||||||
|
|
||||||
|
- `"abc"` (string): Matches an exact sequence of characters.
|
||||||
|
- `.` (any char): Matches any single character (wildcard).
|
||||||
|
- `[123]`, `[a-z]`, `[123a-z]` (class): Matches a single character from a set or range.
|
||||||
|
- `[^123]`, `[^a-z]`, `[^123a-z]` (not class) Matches any single character not in the set.
|
||||||
|
|
||||||
|
## Quantifiers
|
||||||
|
|
||||||
|
Quantifiers determine how many times an item must match. They are placed immediately after the item they modify.
|
||||||
|
|
||||||
|
- `?`: Optional (Zero or one).
|
||||||
|
- `*`: Zero or more.
|
||||||
|
- `+`: One or more.
|
||||||
|
- `{n}`: Exact count. Matches exactly n times.
|
||||||
|
- `{n,}`: At least. Matches n or more times.
|
||||||
|
- `{,m}`: At most. Matches between 0 and m times.
|
||||||
|
- `{n,m}`: Range. Matches between n and m times.
|
||||||
|
|
||||||
|
## Composites
|
||||||
|
|
||||||
|
Complex patterns are built by combining terminals and other rules.
|
||||||
|
|
||||||
|
### 1. Sequences
|
||||||
|
|
||||||
|
Items written consecutively are matched in order.
|
||||||
|
|
||||||
|
```
|
||||||
|
// Matches "A", then "B", then "C"
|
||||||
|
MySequence = "A" "B" "C";
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Grouping
|
||||||
|
|
||||||
|
Parentheses (...) group items together, allowing quantifiers to apply to the entire group.
|
||||||
|
|
||||||
|
```
|
||||||
|
// Matches "AB", "ABAB", "ABABAB"...
|
||||||
|
MyGroup = ("A" "B")+;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Choices
|
||||||
|
|
||||||
|
The pipe | character represents a choice between alternatives.
|
||||||
|
|
||||||
|
The parser evaluates all provided options against the input at the current position and selects the best match
|
||||||
|
based on the following priority rules:
|
||||||
|
|
||||||
|
1. _Longest Match_: The option that consumes the largest number of characters takes priority. This eliminates the
|
||||||
|
need to manually order specific matches before general ones (e.g., "integer" will always be chosen over "int" if
|
||||||
|
the input supports it, regardless of their order in the definition).
|
||||||
|
2. _First Definition Wins_: If multiple options consume the exact same number of characters, the option defined
|
||||||
|
first(left-most) in the list takes priority.
|
||||||
|
|
||||||
|
```
|
||||||
|
// Longest match wins automatically:
|
||||||
|
// Input "integer" is matched by 'type', even though "int" comes first.
|
||||||
|
type = "int" | "integer";
|
||||||
|
|
||||||
|
// Tie-breaker rule:
|
||||||
|
// If input is "foo", both options match 3 characters.
|
||||||
|
// Because 'identifier' is last, it takes priority over 'keyword'.
|
||||||
|
// (Use :kw and :nokw to control such situations, when it applies.)
|
||||||
|
content = keyword | identifier;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Comments
|
||||||
|
|
||||||
|
Comments follow C-style syntax and are ignored by the definition parser.
|
||||||
|
|
||||||
|
- Line comments: Start with // and end at the newline.
|
||||||
|
- Block comments: Enclosed in /* ... */.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [JSON](examples/json.treerack)
|
||||||
|
- [Scheme](examples/scheme.treerack)
|
||||||
|
- [Treerack (itself)](../syntax.treerack)
|
||||||
30
escape.go
30
escape.go
@ -61,33 +61,3 @@ func unescapeChar(c rune) rune {
|
|||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func unescape(escape rune, banned, chars []rune) ([]rune, error) {
|
|
||||||
var (
|
|
||||||
unescaped []rune
|
|
||||||
escaped bool
|
|
||||||
)
|
|
||||||
|
|
||||||
for _, ci := range chars {
|
|
||||||
if escaped {
|
|
||||||
unescaped = append(unescaped, unescapeChar(ci))
|
|
||||||
escaped = false
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case ci == escape:
|
|
||||||
escaped = true
|
|
||||||
case runesContain(banned, ci):
|
|
||||||
return nil, ErrInvalidEscapeCharacter
|
|
||||||
default:
|
|
||||||
unescaped = append(unescaped, ci)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if escaped {
|
|
||||||
return nil, ErrInvalidEscapeCharacter
|
|
||||||
}
|
|
||||||
|
|
||||||
return unescaped, nil
|
|
||||||
}
|
|
||||||
|
|||||||
@ -2,33 +2,8 @@ package treerack
|
|||||||
|
|
||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
func TestUnescape(t *testing.T) {
|
|
||||||
t.Run("char should be escaped", func(t *testing.T) {
|
|
||||||
if _, err := unescape('\\', []rune{'a'}, []rune{'a'}); err == nil {
|
|
||||||
t.Error("failed to fail")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("finished with escape char", func(t *testing.T) {
|
|
||||||
if _, err := unescape('\\', []rune{'a'}, []rune{'b', '\\'}); err == nil {
|
|
||||||
t.Error("failed to fail")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("unescapes", func(t *testing.T) {
|
|
||||||
u, err := unescape('\\', []rune{'a'}, []rune{'b', '\\', 'a'})
|
|
||||||
if err != nil {
|
|
||||||
t.Error(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if string(u) != "ba" {
|
|
||||||
t.Error("unescape failed")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEscape(t *testing.T) {
|
func TestEscape(t *testing.T) {
|
||||||
|
t.Skip()
|
||||||
const (
|
const (
|
||||||
banned = "\b\f\n\r\t\v"
|
banned = "\b\f\n\r\t\v"
|
||||||
unescaped = "\b\f\n\r\t\v"
|
unescaped = "\b\f\n\r\t\v"
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@ -729,10 +729,11 @@ func (c *context) finalizeParse(root parser) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Node struct {
|
type Node struct {
|
||||||
Name string
|
Name string
|
||||||
Nodes []*Node
|
Nodes []*Node
|
||||||
From, To int
|
From int
|
||||||
tokens []rune
|
To int
|
||||||
|
tokens []rune
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *Node) Tokens() []rune {
|
func (n *Node) Tokens() []rune {
|
||||||
|
|||||||
@ -285,7 +285,7 @@ func jsonTreeToJSON(n *Node) (interface{}, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestJSON(t *testing.T) {
|
func TestJSON(t *testing.T) {
|
||||||
runTestsFile(t, "examples/json.treerack", []testItem{{
|
runTestsFile(t, "docs/examples/json.treerack", []testItem{{
|
||||||
title: "true",
|
title: "true",
|
||||||
text: "true",
|
text: "true",
|
||||||
node: &Node{
|
node: &Node{
|
||||||
@ -509,7 +509,7 @@ func TestRandomJSON(t *testing.T) {
|
|||||||
|
|
||||||
buf := bytes.NewBuffer(b)
|
buf := bytes.NewBuffer(b)
|
||||||
|
|
||||||
s, err := openSyntaxFile("examples/json.treerack")
|
s, err := openSyntaxFile("docs/examples/json.treerack")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
return
|
return
|
||||||
|
|||||||
@ -3,7 +3,7 @@ package treerack
|
|||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
func TestKeyVal(t *testing.T) {
|
func TestKeyVal(t *testing.T) {
|
||||||
runTestsFile(t, "examples/keyval.treerack", []testItem{{
|
runTestsFile(t, "docs/examples/keyval.treerack", []testItem{{
|
||||||
title: "empty",
|
title: "empty",
|
||||||
}, {
|
}, {
|
||||||
title: "a comment",
|
title: "a comment",
|
||||||
|
|||||||
@ -4,20 +4,53 @@ import "testing"
|
|||||||
|
|
||||||
func TestKeyword(t *testing.T) {
|
func TestKeyword(t *testing.T) {
|
||||||
const syntax = `
|
const syntax = `
|
||||||
keywords:kw = "foo" | "bar";
|
space:ws = " ";
|
||||||
symbol:nokw = [a-z]+;
|
keyword:kw = "foo" | "bar";
|
||||||
|
symbol:nokw:nows = [a-z]+;
|
||||||
|
doc:root = (keyword | symbol)*;
|
||||||
`
|
`
|
||||||
|
|
||||||
runTests(t, syntax, []testItem{{
|
runTests(t, syntax, []testItem{{
|
||||||
title: "keyword",
|
|
||||||
text: "foo",
|
|
||||||
fail: true,
|
|
||||||
}, {
|
|
||||||
title: "not keyword",
|
title: "not keyword",
|
||||||
text: "baz",
|
text: "baz",
|
||||||
ignorePosition: true,
|
ignorePosition: true,
|
||||||
node: &Node{
|
node: &Node{
|
||||||
Name: "symbol",
|
Name: "doc",
|
||||||
|
Nodes: []*Node{{
|
||||||
|
Name: "symbol",
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
title: "keyword",
|
||||||
|
text: "foo",
|
||||||
|
ignorePosition: true,
|
||||||
|
node: &Node{
|
||||||
|
Name: "doc",
|
||||||
|
Nodes: []*Node{{
|
||||||
|
Name: "keyword",
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
title: "mixed",
|
||||||
|
text: "foo bar baz bar foo baz bar",
|
||||||
|
ignorePosition: true,
|
||||||
|
node: &Node{
|
||||||
|
Name: "doc",
|
||||||
|
Nodes: []*Node{{
|
||||||
|
Name: "keyword",
|
||||||
|
}, {
|
||||||
|
Name: "keyword",
|
||||||
|
}, {
|
||||||
|
Name: "symbol",
|
||||||
|
}, {
|
||||||
|
Name: "keyword",
|
||||||
|
}, {
|
||||||
|
Name: "keyword",
|
||||||
|
}, {
|
||||||
|
Name: "symbol",
|
||||||
|
}, {
|
||||||
|
Name: "keyword",
|
||||||
|
}},
|
||||||
},
|
},
|
||||||
}})
|
}})
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,7 +3,7 @@ package treerack
|
|||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
func TestMML(t *testing.T) {
|
func TestMML(t *testing.T) {
|
||||||
s, err := openSyntaxFile("examples/mml.treerack")
|
s, err := openSyntaxFile("docs/examples/mml.treerack")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
return
|
return
|
||||||
|
|||||||
@ -5,7 +5,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestMMLExp2(t *testing.T) {
|
func TestMMLExp2(t *testing.T) {
|
||||||
s, err := openSyntaxFile("examples/mml-exp2.treerack")
|
s, err := openSyntaxFile("docs/examples/mml-exp2.treerack")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
return
|
return
|
||||||
|
|||||||
@ -5,7 +5,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestMMLExp3(t *testing.T) {
|
func TestMMLExp3(t *testing.T) {
|
||||||
s, err := openSyntaxFile("examples/mml-exp3.treerack")
|
s, err := openSyntaxFile("docs/examples/mml-exp3.treerack")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
return
|
return
|
||||||
|
|||||||
@ -9,7 +9,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestMMLExp(t *testing.T) {
|
func TestMMLExp(t *testing.T) {
|
||||||
s, err := openSyntaxFile("examples/mml-exp.treerack")
|
s, err := openSyntaxFile("docs/examples/mml-exp.treerack")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
return
|
return
|
||||||
@ -2987,7 +2987,7 @@ func TestMMLFile(t *testing.T) {
|
|||||||
|
|
||||||
const n = 180
|
const n = 180
|
||||||
|
|
||||||
s, err := openSyntaxFile("examples/mml-exp.treerack")
|
s, err := openSyntaxFile("docs/examples/mml-exp.treerack")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
return
|
return
|
||||||
@ -2995,7 +2995,7 @@ func TestMMLFile(t *testing.T) {
|
|||||||
|
|
||||||
s.Init()
|
s.Init()
|
||||||
|
|
||||||
f, err := os.Open("examples/test.mml")
|
f, err := os.Open("docs/examples/test.mml")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
return
|
return
|
||||||
|
|||||||
26
nodehead.go
26
nodehead.go
@ -2,21 +2,39 @@ package treerack
|
|||||||
|
|
||||||
import "fmt"
|
import "fmt"
|
||||||
|
|
||||||
|
// Node represents a distinct element in the resulting Abstract Syntax Tree (AST) following a successful parse.
|
||||||
|
// Every named parser that is not an Alias or Whitespace yields a Node.
|
||||||
type Node struct {
|
type Node struct {
|
||||||
Name string
|
|
||||||
Nodes []*Node
|
// Name is the identifier of the parser that generated this node.
|
||||||
From, To int
|
Name string
|
||||||
tokens []rune
|
|
||||||
|
// Nodes contains the child nodes representing the substructures of this node.
|
||||||
|
Nodes []*Node
|
||||||
|
|
||||||
|
// From is the inclusive character offset of the starting position in the input stream.
|
||||||
|
From int
|
||||||
|
|
||||||
|
// To is the exclusive character offset of the ending position in the input stream.
|
||||||
|
To int
|
||||||
|
|
||||||
|
tokens []rune
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tokens returns the raw slice of runes from the input stream represented by this node.
|
||||||
|
//
|
||||||
|
// Note: This returns a reference to the underlying buffer, not a copy. It should not be modified.
|
||||||
func (n *Node) Tokens() []rune {
|
func (n *Node) Tokens() []rune {
|
||||||
return n.tokens
|
return n.tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// String returns the string representation of the node, including its name, position range (From/To), and the
|
||||||
|
// captured text content.
|
||||||
func (n *Node) String() string {
|
func (n *Node) String() string {
|
||||||
return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text())
|
return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Text returns the actual string segment from the input stream represented by this node.
|
||||||
func (n *Node) Text() string {
|
func (n *Node) Text() string {
|
||||||
return string(n.Tokens()[n.From:n.To])
|
return string(n.Tokens()[n.From:n.To])
|
||||||
}
|
}
|
||||||
|
|||||||
66
readme.md
66
readme.md
@ -1,9 +1,65 @@
|
|||||||
# treerack
|
# treerack
|
||||||
|
|
||||||
[WIP] A generic parser generator for Go.
|
**A parser generator for Go.**
|
||||||
|
|
||||||
### Examples
|
Treerack defines and generates recursive descent parsers for arbitrary syntaxes, processing input content into
|
||||||
|
its Abstract Syntax Tree (AST) representation. It utilizes a custom syntax definition format derived from EBNF
|
||||||
|
(Extended Backus-Naur Form), allowing for clear and concise grammar descriptions.
|
||||||
|
|
||||||
- JSON: https://code.squareroundforest.org/arpio/treerack/blob/master/examples/json.treerack
|
## Examples
|
||||||
- Scheme: https://code.squareroundforest.org/arpio/treerack/blob/master/examples/scheme.treerack
|
|
||||||
- Treerack (itself): https://code.squareroundforest.org/arpio/treerack/blob/master/syntax.treerack
|
- **JSON**: [docs/examples/json.treerack](docs/examples/json.treerack)
|
||||||
|
- **Scheme**: [docs/examples/scheme.treerack](docs/examples/scheme.treerack)
|
||||||
|
- **Treerack (self-definition)**: [syntax.treerack](syntax.treerack)
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Treerack operates without a separate lexing phase, parsing character streams directly to produce an AST. The
|
||||||
|
syntax language supports recursive references, enabling the definition of context-free grammars.
|
||||||
|
|
||||||
|
We can define syntaxes during development and use the provided tool to generate static Go code, which is then
|
||||||
|
built into the application. Alternatively, the library supports loading syntaxes dynamically at runtime.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
From source:
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://code.squareroundforest.org/arpio/treerack
|
||||||
|
cd treerack
|
||||||
|
make install
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively:
|
||||||
|
|
||||||
|
```
|
||||||
|
go install code.squareroundforest.org/arpio/treerack/cmd/treerack
|
||||||
|
```
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
- [Manual](docs/manual.md): A guide to the main use cases supported by Treerack.
|
||||||
|
- [Syntax Definition](docs/syntax.md): Detailed reference for the Treerack definition language.
|
||||||
|
- [Library Documentation](https://godocs.io/code.squareroundforest.org/arpio/treerack): GoDoc reference for the
|
||||||
|
runtime library.
|
||||||
|
|
||||||
|
## Developer Notes
|
||||||
|
|
||||||
|
We use a Makefile to manage the build and verification lifecycle.
|
||||||
|
|
||||||
|
Important: Generating the parser for the Treerack syntax itself (bootstrapping) requires multiple phases.
|
||||||
|
Consequently, running standard go build or go test commands may miss subtle consistency problems.
|
||||||
|
|
||||||
|
The authoritative way to verify changes is via the makefile:
|
||||||
|
|
||||||
|
```
|
||||||
|
make check
|
||||||
|
```
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- Lexer & UTF-8: Treerack does not require a lexer, which simplifies the architecture. However, this enforces
|
||||||
|
the use of UTF-8 input. We have considered support for custom tokenizers as a potential future improvement.
|
||||||
|
- Whitespace Delimited Languages: Due to the recursive descent nature and the lack of a dedicated lexer state,
|
||||||
|
defining whitespace-delimited syntaxes (such as Python-style indentation) can be difficult to achieve with the
|
||||||
|
current feature set.
|
||||||
|
|||||||
@ -3,7 +3,7 @@ package treerack
|
|||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
func TestScheme(t *testing.T) {
|
func TestScheme(t *testing.T) {
|
||||||
runTestsFile(t, "examples/scheme.treerack", []testItem{{
|
runTestsFile(t, "docs/examples/scheme.treerack", []testItem{{
|
||||||
title: "empty",
|
title: "empty",
|
||||||
}, {
|
}, {
|
||||||
title: "a function",
|
title: "a function",
|
||||||
|
|||||||
@ -140,5 +140,10 @@ func main() {
|
|||||||
varName = "headCodeExported"
|
varName = "headCodeExported"
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("package %s\n\n// generated with scripts/createhead.go\nconst %s=%s", packageName, varName, quotedCode)
|
fmt.Printf(
|
||||||
|
"package %s\n\n// generated with scripts/createhead.go\nconst %s=%s",
|
||||||
|
packageName,
|
||||||
|
varName,
|
||||||
|
quotedCode,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,7 +3,7 @@ package treerack
|
|||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
func TestSExpr(t *testing.T) {
|
func TestSExpr(t *testing.T) {
|
||||||
runTestsFile(t, "examples/sexpr.treerack", []testItem{{
|
runTestsFile(t, "docs/examples/sexpr.treerack", []testItem{{
|
||||||
title: "number",
|
title: "number",
|
||||||
text: "42",
|
text: "42",
|
||||||
nodes: []*Node{{
|
nodes: []*Node{{
|
||||||
|
|||||||
84
syntax.go
84
syntax.go
@ -1,3 +1,18 @@
|
|||||||
|
// Package treerack provides a parser generator for defining and interacting with arbitrary syntaxes.
|
||||||
|
//
|
||||||
|
// Treerack allows developers to define grammars - programmatically or via a syntax definition language
|
||||||
|
// derivative of EBNF — and generate recursive descent parsers. These parsers process input content and produce
|
||||||
|
// an Abstract Syntax Tree (AST) representation.
|
||||||
|
//
|
||||||
|
// The library supports two primary workflows:
|
||||||
|
//
|
||||||
|
// 1. Dynamic (Runtime): Loading or defining syntaxes programmatically at runtime to parse input immediately.
|
||||||
|
//
|
||||||
|
// 2. Static (Generation): Defining syntaxes during development and generating Go source code to be compiled
|
||||||
|
// into the application.
|
||||||
|
//
|
||||||
|
// For detailed syntax definition rules and the command-line tool usage, please refer to the repository
|
||||||
|
// documentation: https://code.squareroundforest.org/arpio/treerack
|
||||||
package treerack
|
package treerack
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@ -7,14 +22,37 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
// if min=0&&max=0, it means min=1,max=1
|
// SequenceItem represents a single element within a sequence definition, referencing another parser by name.
|
||||||
// else if max<=0, it means no max
|
//
|
||||||
// else if min<=0, it means no min
|
// Cardinality logic for SequenceItem:
|
||||||
|
//
|
||||||
|
// - If Min=0 and Max=0: Matches exactly once (equivalent to Min=1, Max=1).
|
||||||
|
//
|
||||||
|
// - If Max <= 0: Unbounded upper limit (matches Min or more times).
|
||||||
|
//
|
||||||
|
// - If Min <= 0: No lower limit (matches 0 to Max times).
|
||||||
type SequenceItem struct {
|
type SequenceItem struct {
|
||||||
Name string
|
|
||||||
Min, Max int
|
// Name is the identifier of the referenced parser definition.
|
||||||
|
Name string
|
||||||
|
|
||||||
|
// Min specifies the minimum required occurrences of the item.
|
||||||
|
Min int
|
||||||
|
|
||||||
|
// Max specifies the maximum accepted occurrences of the item.
|
||||||
|
Max int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Syntax represents a complete grammar definition consisting of multiple named parsers.
|
||||||
|
//
|
||||||
|
// The lifecycle of a Syntax instance consists of three phases:
|
||||||
|
//
|
||||||
|
// 1. Definition: Define parsers using methods like AnyChar, Sequence, and Choice, or load a definition via
|
||||||
|
// ReadSyntax.
|
||||||
|
//
|
||||||
|
// 2. Initialization: Call Init() to validate definitions, resolve references, and seal the syntax.
|
||||||
|
//
|
||||||
|
// 3. Execution: Use Parse() to process input or Generate() to create Go source code.
|
||||||
type Syntax struct {
|
type Syntax struct {
|
||||||
registry *registry
|
registry *registry
|
||||||
initialized bool
|
initialized bool
|
||||||
@ -24,9 +62,15 @@ type Syntax struct {
|
|||||||
root definition
|
root definition
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GeneratorOptions control the behavior of the Go code generator.
|
||||||
type GeneratorOptions struct {
|
type GeneratorOptions struct {
|
||||||
|
|
||||||
|
// PackageName sets the package name for the generated source file. Defaults to main.
|
||||||
PackageName string
|
PackageName string
|
||||||
Export bool
|
|
||||||
|
// Export determines whether the generated Parse function is exported (public) or unexported (private)
|
||||||
|
// within the package.
|
||||||
|
Export bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// applied in a non-type-checked way
|
// applied in a non-type-checked way
|
||||||
@ -51,11 +95,18 @@ type definition interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ErrSyntaxInitialized = errors.New("syntax initialized")
|
|
||||||
ErrNoParsersDefined = errors.New("no parsers defined")
|
// ErrSyntaxInitialized is returned when attempting to modify a syntax that has already been initialized.
|
||||||
ErrInvalidEscapeCharacter = errors.New("invalid escape character")
|
ErrSyntaxInitialized = errors.New("syntax initialized")
|
||||||
ErrMultipleRoots = errors.New("multiple roots")
|
|
||||||
ErrInvalidSymbolName = errors.New("invalid symbol name")
|
// ErrNoParsersDefined is returned when attempting to initialize a syntax containing no parser definitions.
|
||||||
|
ErrNoParsersDefined = errors.New("no parsers defined")
|
||||||
|
|
||||||
|
// ErrMultipleRoots is returned when a syntax definition contains multiple explicit root parsers.
|
||||||
|
ErrMultipleRoots = errors.New("multiple roots")
|
||||||
|
|
||||||
|
// ErrInvalidSymbolName is returned when a named parser is assigned an invalid identifier.
|
||||||
|
ErrInvalidSymbolName = errors.New("invalid symbol name")
|
||||||
)
|
)
|
||||||
|
|
||||||
func (ct CommitType) String() string {
|
func (ct CommitType) String() string {
|
||||||
@ -193,6 +244,7 @@ func (s *Syntax) anyChar(name string, ct CommitType) error {
|
|||||||
return s.class(name, ct, true, nil, nil)
|
return s.class(name, ct, true, nil, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AnyChar registers a parser that accepts any single character (a wildcard).
|
||||||
func (s *Syntax) AnyChar(name string, ct CommitType) error {
|
func (s *Syntax) AnyChar(name string, ct CommitType) error {
|
||||||
if !isValidSymbol(name) {
|
if !isValidSymbol(name) {
|
||||||
return ErrInvalidSymbolName
|
return ErrInvalidSymbolName
|
||||||
@ -223,6 +275,8 @@ func (s *Syntax) class(name string, ct CommitType, not bool, chars []rune, range
|
|||||||
return s.sequence(name, ct, SequenceItem{Name: cname})
|
return s.sequence(name, ct, SequenceItem{Name: cname})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Class registers a character class parser, accepting characters defined in the specific list or ranges. If
|
||||||
|
// 'not' is true, it matches any character *except* those defined.
|
||||||
func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error {
|
func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error {
|
||||||
if !isValidSymbol(name) {
|
if !isValidSymbol(name) {
|
||||||
return ErrInvalidSymbolName
|
return ErrInvalidSymbolName
|
||||||
@ -244,6 +298,7 @@ func (s *Syntax) charSequence(name string, ct CommitType, chars []rune) error {
|
|||||||
return s.sequence(name, ct|NoWhitespace, namesToSequenceItems(refs)...)
|
return s.sequence(name, ct|NoWhitespace, namesToSequenceItems(refs)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CharSequence registers a parser that matches a specific string literal (e.g., "foo").
|
||||||
func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error {
|
func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error {
|
||||||
if !isValidSymbol(name) {
|
if !isValidSymbol(name) {
|
||||||
return ErrInvalidSymbolName
|
return ErrInvalidSymbolName
|
||||||
@ -256,6 +311,7 @@ func (s *Syntax) sequence(name string, ct CommitType, items ...SequenceItem) err
|
|||||||
return s.register(newSequence(name, ct, items))
|
return s.register(newSequence(name, ct, items))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sequence registers a parser that matches a specific order of other named parsers (defined as SequenceItems).
|
||||||
func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) error {
|
func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) error {
|
||||||
if !isValidSymbol(name) {
|
if !isValidSymbol(name) {
|
||||||
return ErrInvalidSymbolName
|
return ErrInvalidSymbolName
|
||||||
@ -268,6 +324,7 @@ func (s *Syntax) choice(name string, ct CommitType, options ...string) error {
|
|||||||
return s.register(newChoice(name, ct, options))
|
return s.register(newChoice(name, ct, options))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Choice registers a parser that matches exactly one of the provided named options.
|
||||||
func (s *Syntax) Choice(name string, ct CommitType, options ...string) error {
|
func (s *Syntax) Choice(name string, ct CommitType, options ...string) error {
|
||||||
if !isValidSymbol(name) {
|
if !isValidSymbol(name) {
|
||||||
return ErrInvalidSymbolName
|
return ErrInvalidSymbolName
|
||||||
@ -276,6 +333,7 @@ func (s *Syntax) Choice(name string, ct CommitType, options ...string) error {
|
|||||||
return s.choice(name, ct|userDefined, options...)
|
return s.choice(name, ct|userDefined, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReadSyntax loads a grammar definition from a reader using the Treerack syntax format.
|
||||||
func (s *Syntax) ReadSyntax(r io.Reader) error {
|
func (s *Syntax) ReadSyntax(r io.Reader) error {
|
||||||
if s.initialized {
|
if s.initialized {
|
||||||
return ErrSyntaxInitialized
|
return ErrSyntaxInitialized
|
||||||
@ -302,6 +360,8 @@ func (s *Syntax) ReadSyntax(r io.Reader) error {
|
|||||||
return define(s, n)
|
return define(s, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Init validates, initializes, and seals the syntax. This method must be called exactly once before Parsing or
|
||||||
|
// Generating.
|
||||||
func (s *Syntax) Init() error {
|
func (s *Syntax) Init() error {
|
||||||
if s.errInitFailed != nil {
|
if s.errInitFailed != nil {
|
||||||
return s.errInitFailed
|
return s.errInitFailed
|
||||||
@ -359,6 +419,7 @@ func (s *Syntax) keywordParsers() []parser {
|
|||||||
return p
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generate writes Go source code implementing the parser to the provided writer.
|
||||||
func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error {
|
func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error {
|
||||||
if err := s.Init(); err != nil {
|
if err := s.Init(); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -454,6 +515,7 @@ func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse reads from the input stream and constructs an AST based on the defined syntax.
|
||||||
func (s *Syntax) Parse(r io.Reader) (*Node, error) {
|
func (s *Syntax) Parse(r io.Reader) (*Node, error) {
|
||||||
if err := s.Init(); err != nil {
|
if err := s.Init(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
@ -7,16 +7,39 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// CommitType controls how the output of a named parser is handled and represented in the resulting AST.
|
||||||
type CommitType int
|
type CommitType int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
None CommitType = 0
|
|
||||||
|
// None indicates the default behavior: parsed segments are represented as named nodes in the AST.
|
||||||
|
// Whitespace handling inherits the syntax's global settings.
|
||||||
|
None CommitType = 0
|
||||||
|
|
||||||
|
// Alias treats the parser as a pass-through. Validated segments are included in the AST node of the
|
||||||
|
// enclosing parser rather than creating a distinct node.
|
||||||
Alias CommitType = 1 << iota
|
Alias CommitType = 1 << iota
|
||||||
|
|
||||||
|
// Whitespace designates a parser (typically a character sequence) to be treated as ignored whitespace
|
||||||
|
// throughout the input.
|
||||||
Whitespace
|
Whitespace
|
||||||
|
|
||||||
|
// NoWhitespace prevents the automatic skipping of defined whitespace characters within specific sequences.
|
||||||
NoWhitespace
|
NoWhitespace
|
||||||
|
|
||||||
|
// Keyword marks a sequence as a reserved keyword. This allows specific sequences to be protected or
|
||||||
|
// restricted in certain contexts via the NoKeyword flag.
|
||||||
Keyword
|
Keyword
|
||||||
|
|
||||||
|
// NoKeyword prevents the parser from matching sequences marked as Keywords.
|
||||||
NoKeyword
|
NoKeyword
|
||||||
|
|
||||||
|
// FailPass configures the parser to propagate failure up to the enclosing parser rather than handling it
|
||||||
|
// locally.
|
||||||
FailPass
|
FailPass
|
||||||
|
|
||||||
|
// Root explicitly marks the parser as the root of the syntax. By default, the last defined parser is
|
||||||
|
// considered the root.
|
||||||
Root
|
Root
|
||||||
|
|
||||||
userDefined
|
userDefined
|
||||||
@ -30,30 +53,24 @@ const (
|
|||||||
formatIncludeComments
|
formatIncludeComments
|
||||||
)
|
)
|
||||||
|
|
||||||
// ParseError is returned when the input text doesn't match
|
// ParseError reports a failure to match the input text against the defined syntax.
|
||||||
// the used syntax during parsing.
|
|
||||||
type ParseError struct {
|
type ParseError struct {
|
||||||
|
|
||||||
// Input is the name of the input file or <input> if not
|
// Input denotes the name of the input source (e.g., filename), or "<input>" if unavailable.
|
||||||
// available.
|
|
||||||
Input string
|
Input string
|
||||||
|
|
||||||
// Offset is the index of the right-most failing
|
// Offset is the index of the right-most token where the parse failed.
|
||||||
// token in the input text.
|
|
||||||
Offset int
|
Offset int
|
||||||
|
|
||||||
// Line tells the line index of the right-most failing
|
// Line is the zero-based line number of the failure position.
|
||||||
// token in the input text.
|
|
||||||
//
|
//
|
||||||
// It is zero-based, and for error reporting, it is
|
// For display purposes, increment by one.
|
||||||
// recommended to increment it by one.
|
|
||||||
Line int
|
Line int
|
||||||
|
|
||||||
// Column tells the column index of the right-most failing
|
// Column is the zero-based column index of the failure position.
|
||||||
// token in the input text.
|
|
||||||
Column int
|
Column int
|
||||||
|
|
||||||
// Definition tells the right-most unmatched parser definition.
|
// Definition identifies the name of the specific parser definition where the match failed.
|
||||||
Definition string
|
Definition string
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -70,8 +87,10 @@ type builder interface {
|
|||||||
build(*context) ([]*Node, bool)
|
build(*context) ([]*Node, bool)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences.
|
||||||
var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
|
var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
|
||||||
|
|
||||||
|
// Error returns the formatted failure message.
|
||||||
func (pe *ParseError) Error() string {
|
func (pe *ParseError) Error() string {
|
||||||
return fmt.Sprintf(
|
return fmt.Sprintf(
|
||||||
"%s:%d:%d:parse failed, parsing: %s",
|
"%s:%d:%d:parse failed, parsing: %s",
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user