From 4c6c817431258245a490a8f6bd13232086756289 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sun, 18 Jan 2026 22:52:27 +0100 Subject: [PATCH] documentation --- Makefile | 4 +- buzz.txt | 9 - cmd/treerack/docreflect.gen.go | 85 +- cmd/treerack/show.go | 4 + cmd/treerack/show_test.go | 14 +- docs/examples/acalc/acalc.treerack | 47 + docs/examples/acalc/go.mod | 3 + docs/examples/acalc/main.go | 143 +++ docs/examples/acalc/parser.go | 824 ++++++++++++++++++ {examples => docs/examples}/json.treerack | 0 {examples => docs/examples}/keyval.treerack | 0 {examples => docs/examples}/mml-exp.treerack | 0 {examples => docs/examples}/mml-exp2.treerack | 0 {examples => docs/examples}/mml-exp3.treerack | 0 {examples => docs/examples}/mml.treerack | 0 {examples => docs/examples}/scheme.treerack | 0 {examples => docs/examples}/sexpr.treerack | 0 {examples => docs/examples}/test.mml | 0 docs/manual.md | 629 +++++++++++++ docs/syntax.md | 121 +++ escape.go | 30 - escape_test.go | 27 +- head.go | 2 +- headexported.go | 2 +- internal/self/self.go | 9 +- json_test.go | 4 +- keyval_test.go | 2 +- keyword_test.go | 47 +- mml_test.go | 2 +- mmlexp2_test.go | 2 +- mmlexp3_test.go | 2 +- mmlexp_test.go | 6 +- nodehead.go | 26 +- readme.md | 66 +- scheme_test.go | 2 +- scripts/createhead.go | 7 +- sexpr_test.go | 2 +- syntax.go | 84 +- syntaxhead.go | 47 +- 39 files changed, 2077 insertions(+), 175 deletions(-) delete mode 100644 buzz.txt create mode 100644 docs/examples/acalc/acalc.treerack create mode 100644 docs/examples/acalc/go.mod create mode 100644 docs/examples/acalc/main.go create mode 100644 docs/examples/acalc/parser.go rename {examples => docs/examples}/json.treerack (100%) rename {examples => docs/examples}/keyval.treerack (100%) rename {examples => docs/examples}/mml-exp.treerack (100%) rename {examples => docs/examples}/mml-exp2.treerack (100%) rename {examples => docs/examples}/mml-exp3.treerack (100%) rename {examples => docs/examples}/mml.treerack (100%) rename {examples => docs/examples}/scheme.treerack (100%) rename {examples => docs/examples}/sexpr.treerack (100%) rename {examples => docs/examples}/test.mml (100%) create mode 100644 docs/manual.md create mode 100644 docs/syntax.md diff --git a/Makefile b/Makefile index 7597ad1..a15274b 100644 --- a/Makefile +++ b/Makefile @@ -52,10 +52,12 @@ headexported.go: .build/headexported.go cp .build/headexported.go . .build/self.go: $(sources) $(parsers) head.go headexported.go .build - # since generator code depends on the syntax itself, we need to passes: + # since the generator code depends on the syntax itself, and such influences its own output, we need two + # passes: go build -o .build/treerack.current ./cmd/treerack .build/treerack.current generate --export --package-name self < syntax.treerack > .build/self.go go fmt .build/self.go + # we backup the original and apply the new: cp internal/self/self.go .build/self.go.backup cp .build/self.go internal/self # second pass: diff --git a/buzz.txt b/buzz.txt deleted file mode 100644 index 4e20506..0000000 --- a/buzz.txt +++ /dev/null @@ -1,9 +0,0 @@ -generator, in-process init or command line -syntax from text or defined during runtime, or combined -simple syntax with recursion -no lexer required -utf8, 8bit or custom tokens -abstract syntax tree from text of arbitrary syntax -reading from streams -context free, however support for custom tokens in the input -custom tokens for indentation built in diff --git a/cmd/treerack/docreflect.gen.go b/cmd/treerack/docreflect.gen.go index d2f5c3d..3ef65ba 100644 --- a/cmd/treerack/docreflect.gen.go +++ b/cmd/treerack/docreflect.gen.go @@ -2,49 +2,48 @@ Generated with https://code.squareroundforest.org/arpio/docreflect */ + package main - import "code.squareroundforest.org/arpio/docreflect" - func init() { - docreflect.Register("main", "") - docreflect.Register("main.check", "check parses input content against the provided syntax definition and fails if the input does not match.\nSyntax can be provided via a filename option or an inline string option. Input can be provided via a filename\noption, a positional argument filename, an inline string option, or piped from standard input.\n\nfunc(o, stdin, args)") - docreflect.Register("main.checkOptions", "") - docreflect.Register("main.checkOptions.Input", "Input specifies the filename of the input content to be validated.\n") - docreflect.Register("main.checkOptions.InputString", "InputString specifies the input content as an inline string.\n") - docreflect.Register("main.checkOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") - docreflect.Register("main.checkOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") - docreflect.Register("main.checkSyntax", "checkSyntax validates a syntax definition. The syntax may be provided via a file path (using an option or a\npositional argument), an inline string, or piped from standard input.\n\nfunc(o, stdin, args)") - docreflect.Register("main.checkSyntaxOptions", "") - docreflect.Register("main.checkSyntaxOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") - docreflect.Register("main.checkSyntaxOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") - docreflect.Register("main.errInvalidFilename", "") - docreflect.Register("main.errMultipleInputs", "") - docreflect.Register("main.errNoInput", "") - docreflect.Register("main.generate", "generate generates Go code that can parse arbitrary input with the provided syntax, and can be used embedded\nin an application.\n\nThe syntax may be provided via a file path (using an option or a positional argument), an\ninline string, or piped from standard input.\n\nfunc(o, stdin, stdout, args)") - docreflect.Register("main.generateOptions", "") - docreflect.Register("main.generateOptions.Export", "Export determines whether the generated parse function is exported (visible outside its package).\n") - docreflect.Register("main.generateOptions.PackageName", "PackageName specifies the package name for the generated code. Defaults to main.\n") - docreflect.Register("main.generateOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") - docreflect.Register("main.generateOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") - docreflect.Register("main.init", "\nfunc()") - docreflect.Register("main.initInput", "\nfunc(filename, stringValue, stdin, args)") - docreflect.Register("main.main", "\nfunc()") - docreflect.Register("main.mapNode", "\nfunc(n)") - docreflect.Register("main.node", "") - docreflect.Register("main.node.From", "") - docreflect.Register("main.node.Name", "") - docreflect.Register("main.node.Nodes", "") - docreflect.Register("main.node.Text", "") - docreflect.Register("main.node.To", "") - docreflect.Register("main.noop", "\nfunc()") - docreflect.Register("main.show", "show input content against a provided syntax definition and outputs the resulting AST (Abstract Syntax Tree)\nin JSON format. Syntax can be provided via a filename option or an inline string option. Input can be\nprovided via a filename option, a positional argument filename, an inline string option, or piped from\nstandard input.\n\nfunc(o, stdin, stdout, args)") - docreflect.Register("main.showOptions", "") - docreflect.Register("main.showOptions.Indent", "Indent specifies a custom indentation string for the output.\n") - docreflect.Register("main.showOptions.Input", "Input specifies the filename of the input content to be validated.\n") - docreflect.Register("main.showOptions.InputString", "InputString specifies the input content as an inline string.\n") - docreflect.Register("main.showOptions.Pretty", "Pretty enables indented, human-readable output.\n") - docreflect.Register("main.showOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") - docreflect.Register("main.showOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") - docreflect.Register("main.version", "") -} +docreflect.Register("main", "") +docreflect.Register("main.check", "check parses input content against the provided syntax definition and fails if the input does not match.\nSyntax can be provided via a filename option or an inline string option. Input can be provided via a filename\noption, a positional argument filename, an inline string option, or piped from standard input.\n\nfunc(o, stdin, args)") +docreflect.Register("main.checkOptions", "") +docreflect.Register("main.checkOptions.Input", "Input specifies the filename of the input content to be validated.\n") +docreflect.Register("main.checkOptions.InputString", "InputString specifies the input content as an inline string.\n") +docreflect.Register("main.checkOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") +docreflect.Register("main.checkOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") +docreflect.Register("main.checkSyntax", "checkSyntax validates a syntax definition. The syntax may be provided via a file path (using an option or a\npositional argument), an inline string, or piped from standard input.\n\nfunc(o, stdin, args)") +docreflect.Register("main.checkSyntaxOptions", "") +docreflect.Register("main.checkSyntaxOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") +docreflect.Register("main.checkSyntaxOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") +docreflect.Register("main.errInvalidFilename", "") +docreflect.Register("main.errMultipleInputs", "") +docreflect.Register("main.errNoInput", "") +docreflect.Register("main.generate", "generate generates Go code that can parse arbitrary input with the provided syntax, and can be used embedded\nin an application.\n\nThe syntax may be provided via a file path (using an option or a positional argument), an\ninline string, or piped from standard input.\n\nfunc(o, stdin, stdout, args)") +docreflect.Register("main.generateOptions", "") +docreflect.Register("main.generateOptions.Export", "Export determines whether the generated parse function is exported (visible outside its package).\n") +docreflect.Register("main.generateOptions.PackageName", "PackageName specifies the package name for the generated code. Defaults to main.\n") +docreflect.Register("main.generateOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") +docreflect.Register("main.generateOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") +docreflect.Register("main.init", "\nfunc()") +docreflect.Register("main.initInput", "\nfunc(filename, stringValue, stdin, args)") +docreflect.Register("main.main", "\nfunc()") +docreflect.Register("main.mapNode", "\nfunc(n)") +docreflect.Register("main.node", "") +docreflect.Register("main.node.From", "") +docreflect.Register("main.node.Name", "") +docreflect.Register("main.node.Nodes", "") +docreflect.Register("main.node.Text", "") +docreflect.Register("main.node.To", "") +docreflect.Register("main.noop", "\nfunc()") +docreflect.Register("main.show", "show input content against a provided syntax definition and outputs the resulting AST (Abstract Syntax Tree)\nin JSON format. Syntax can be provided via a filename option or an inline string option. Input can be\nprovided via a filename option, a positional argument filename, an inline string option, or piped from\nstandard input.\n\nfunc(o, stdin, stdout, args)") +docreflect.Register("main.showOptions", "") +docreflect.Register("main.showOptions.Indent", "Indent specifies a custom indentation string for the output.\n") +docreflect.Register("main.showOptions.Input", "Input specifies the filename of the input content to be validated.\n") +docreflect.Register("main.showOptions.InputString", "InputString specifies the input content as an inline string.\n") +docreflect.Register("main.showOptions.Pretty", "Pretty enables indented, human-readable output.\n") +docreflect.Register("main.showOptions.Syntax", "Syntax specifies the filename of the syntax definition file.\n") +docreflect.Register("main.showOptions.SyntaxString", "SyntaxString specifies the syntax as an inline string.\n") +docreflect.Register("main.version", "") +} \ No newline at end of file diff --git a/cmd/treerack/show.go b/cmd/treerack/show.go index c07f9bc..fef5c0c 100644 --- a/cmd/treerack/show.go +++ b/cmd/treerack/show.go @@ -105,5 +105,9 @@ func show(o showOptions, stdin io.Reader, stdout io.Writer, args ...string) erro return err } + if _, err := stdout.Write([]byte{'\n'}); err != nil { + return err + } + return nil } diff --git a/cmd/treerack/show_test.go b/cmd/treerack/show_test.go index 1fb989a..818335f 100644 --- a/cmd/treerack/show_test.go +++ b/cmd/treerack/show_test.go @@ -128,7 +128,7 @@ func TestShow(t *testing.T) { t.Fatal(nil) } - if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` { + if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" { t.Fatal(out.String()) } }) @@ -144,7 +144,7 @@ func TestShow(t *testing.T) { t.Fatal(nil) } - if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` { + if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" { t.Fatal(out.String()) } }) @@ -159,7 +159,7 @@ func TestShow(t *testing.T) { t.Fatal(nil) } - if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` { + if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" { t.Fatal(out.String()) } }) @@ -172,7 +172,7 @@ func TestShow(t *testing.T) { t.Fatal(nil) } - if out.String() != `{"name":"foo","from":0,"to":3,"text":"bar"}` { + if out.String() != "{\"name\":\"foo\",\"from\":0,\"to\":3,\"text\":\"bar\"}\n" { t.Fatal(out.String()) } }) @@ -189,7 +189,7 @@ func TestShow(t *testing.T) { t.Fatal(nil) } - const expect = "{\n \"name\": \"foo\",\n \"from\": 0,\n \"to\": 3,\n \"text\": \"bar\"\n}" + const expect = "{\n \"name\": \"foo\",\n \"from\": 0,\n \"to\": 3,\n \"text\": \"bar\"\n}\n" if out.String() != expect { t.Fatal(out.String()) } @@ -207,7 +207,7 @@ func TestShow(t *testing.T) { t.Fatal(nil) } - if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}" { + if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}\n" { t.Fatal(out.String()) } }) @@ -225,7 +225,7 @@ func TestShow(t *testing.T) { t.Fatal(nil) } - if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}" { + if out.String() != "{\nxx\"name\": \"foo\",\nxx\"from\": 0,\nxx\"to\": 3,\nxx\"text\": \"bar\"\n}\n" { t.Fatal(out.String()) } }) diff --git a/docs/examples/acalc/acalc.treerack b/docs/examples/acalc/acalc.treerack new file mode 100644 index 0000000..71caf7c --- /dev/null +++ b/docs/examples/acalc/acalc.treerack @@ -0,0 +1,47 @@ +// first define our whitespace chars: +ignore:ws = " " | [\t] | [\r] | [\n]; + +// define the format of input numbers. With the :nows flag we declare that we don't expect ignored spaces +// between the digits and the delimiters. We support integers, floating point numbers, and floating point +// numbers with their exponential notation. We don't support arbitrary leading zeros to avoid confusion with the +// octal representation of numbers, which is not supported here. +num:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; + +// define the supported operators: +add = "+"; +sub = "-"; +mul = "*"; +div = "/"; + +// let's define grouping. Any expression can be grouped. The definition of the expression can be found further +// down in the syntax document. This usage of the expression reference is also a good example for recursive +// definitions. Using the :alias flag prevents generating a separate node in the resulting AST. +group:alias = "(" expression ")"; + +// we group the operators by precedence. This is necessary to parse the expressions like a * b + c in a structure +// that is equivalent to (a * b) + c. +op0:alias = mul | div; +op1:alias = add | sub; + +// we also define which operands can be used at which precedence level. Notice, how operand1 also allows binary0 +// expressions. +operand0:alias = num | group; +operand1:alias = operand0 | binary0; + +// using the prioritized operators, we can define the prioritized binary expressions. We support a + b + c, and +// not only a + b. +binary0 = operand0 (op0 operand0)+; +binary1 = operand1 (op1 operand1)+; +binary:alias = binary0 | binary1; + +// let's define, what an expression can be. Notice the recursion along expression and group. +expression:alias = num | group | binary; + +// finally, define the root of the parser, the result of the arithmetic expression. It can be any expression, +// but since we used the :alias flag for the expression definition, we need to add a non-alias parser that will +// represent the root of the resulting AST. This also allows us to define an "exit" token, which can be used +// exit from the REPL loop of our application. +// +// Note that we don't need to use the :root flag here, because it is our last definition, and this means that +// the expression is the root parser of the syntax. +result = expression | "exit" diff --git a/docs/examples/acalc/go.mod b/docs/examples/acalc/go.mod new file mode 100644 index 0000000..b4c31fe --- /dev/null +++ b/docs/examples/acalc/go.mod @@ -0,0 +1,3 @@ +module acalac + +go 1.25.4 diff --git a/docs/examples/acalc/main.go b/docs/examples/acalc/main.go new file mode 100644 index 0000000..7adfce3 --- /dev/null +++ b/docs/examples/acalc/main.go @@ -0,0 +1,143 @@ +package main + +import ( + "bufio" + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "os" + "strings" +) + +var errExit = errors.New("exit") + +func repl(input io.Reader, output io.Writer) { + // use buffered io, to be able to read the input line-by-line: + buf := bufio.NewReader(os.Stdin) + + // our REPL loop: + for { + // print a basic prompt: + if _, err := output.Write([]byte("> ")); err != nil { + + // we cannot fix it if there is an error here: + log.Fatalln(err) + } + + // read the input and handle the errors: + expr, err := read(buf) + + // when EOF, that means the user pressed Ctrl+D. Let's terminate the output with a conventional newline + // and exit: + if errors.Is(err, io.EOF) { + output.Write([]byte{'\n'}) + os.Exit(0) + } + + // when errExit, that means the user entered exit: + if errors.Is(err, errExit) { + os.Exit(0) + } + + // if it's a parser error, we print and continue from reading again, to allow the user to fix the + // problem: + var perr *parseError + if errors.As(err, &perr) { + log.Println(err) + continue + } + + // in case of any other error, we don't know what's going on, so we get out of here right away: + if err != nil { + log.Fatalln(err) + } + + // if we received an expression, then we can evaluate it. We are not expecting errors here: + result := eval(expr) + + // we have the result, we need to print it: + if err := print(output, result); err != nil { + + // if printing fails, we don't know how to fix it, so we get out of here: + log.Fatalln(err) + } + } +} + +func read(input *bufio.Reader) (*node, error) { + line, err := input.ReadString('\n') + if err != nil { + return nil, err + } + + // expr will be of type *node, which type is defined in the generated code + expr, err := parse(bytes.NewBufferString(line)) + if err != nil { + return nil, err + } + + if strings.TrimSpace(expr.Text()) == "exit" { + return nil, errExit + } + + // we know based on the syntax, that the top level node will always have a single child, either a number + // literal or a binary operation: + return expr.Nodes[0], nil +} + +// eval always returns the calculated result as a float64: +func eval(expr *node) float64 { + + // we know that it's either a number or a binary operation: + var value float64 + switch expr.Name { + case "num": + + // the number format in our syntax is based on the JSON spec, so we can piggy-back on it for the number + // parsing. In a real application, we would need to handle the errors here anyway, even if our parser + // already validated the input: + json.Unmarshal([]byte(expr.Text()), &value) + return value + default: + + // we know that the first node is either a number of a child expression: + value, expr.Nodes = eval(expr.Nodes[0]), expr.Nodes[1:] + + // we don't need to track back, so we can drop the processed nodes while consuming them: + for len(expr.Nodes) > 0 { + var ( + operator string + operand float64 + ) + + operator, operand, expr.Nodes = expr.Nodes[0].Name, eval(expr.Nodes[1]), expr.Nodes[2:] + switch operator { + case "add": + value += operand + case "sub": + value -= operand + case "mul": + value *= operand + case "div": + // Go returns -Inf or +Inf on division by zero: + value /= operand + } + } + } + + return value +} + +func print(output io.Writer, result float64) error { + _, err := fmt.Fprintln(output, result) + return err +} + +func main() { + // for testability, we define the REPL loop in a separate function so that the test code can call it with + // in-memory buffers as input and output. Our main function calls it with the stdio handles: + repl(os.Stdin, os.Stdout) +} diff --git a/docs/examples/acalc/parser.go b/docs/examples/acalc/parser.go new file mode 100644 index 0000000..299803e --- /dev/null +++ b/docs/examples/acalc/parser.go @@ -0,0 +1,824 @@ + +/* +This file was generated with treerack (https://code.squareroundforest.org/arpio/treerack). + +The contents of this file fall under different licenses. + +The code between the "// head" and "// eo head" lines falls under the same +license as the source code of treerack (https://code.squareroundforest.org/arpio/treerack), +unless explicitly stated otherwise, if treerack's license allows changing the +license of this source code. + +Treerack's license: MIT https://opensource.org/licenses/MIT +where YEAR=2017, COPYRIGHT HOLDER=Arpad Ryszka (arpad.ryszka@gmail.com) + +The rest of the content of this file falls under the same license as the one +that the user of treerack generating this file declares for it, or it is +unlicensed. +*/ + + +package main + +// head +import ( + "strconv" + "errors" + "io" + "strings" + "unicode" + "fmt" + "bufio" +) + +type charParser struct { + name string + id int + not bool + chars []rune + ranges [][]rune +} +type charBuilder struct { + name string + id int +} + +func (p *charParser) nodeName() string { + return p.name +} +func (p *charParser) nodeID() int { + return p.id +} +func (p *charParser) commitType() commitType { + return alias +} +func matchChar(chars []rune, ranges [][]rune, not bool, char rune) bool { + for _, ci := range chars { + if ci == char { + return !not + } + } + for _, ri := range ranges { + if char >= ri[0] && char <= ri[1] { + return !not + } + } + return not +} +func (p *charParser) match(t rune) bool { + return matchChar(p.chars, p.ranges, p.not, t) +} +func (p *charParser) parse(c *context) { + if tok, ok := c.token(); !ok || !p.match(tok) { + if c.offset > c.failOffset { + c.failOffset = c.offset + c.failingParser = nil + } + c.fail(c.offset) + return + } + c.success(c.offset + 1) +} +func (b *charBuilder) nodeName() string { + return b.name +} +func (b *charBuilder) nodeID() int { + return b.id +} +func (b *charBuilder) build(c *context) ([]*node, bool) { + return nil, false +} + +type sequenceParser struct { + name string + id int + commit commitType + items []parser + ranges [][]int + generalizations []int + allChars bool +} +type sequenceBuilder struct { + name string + id int + commit commitType + items []builder + ranges [][]int + generalizations []int + allChars bool +} + +func (p *sequenceParser) nodeName() string { + return p.name +} +func (p *sequenceParser) nodeID() int { + return p.id +} +func (p *sequenceParser) commitType() commitType { + return p.commit +} +func (p *sequenceParser) parse(c *context) { + if !p.allChars { + if c.results.pending(c.offset, p.id) { + c.fail(c.offset) + return + } + c.results.markPending(c.offset, p.id) + } + var ( + currentCount int + parsed bool + ) + itemIndex := 0 + from := c.offset + to := c.offset + for itemIndex < len(p.items) { + p.items[itemIndex].parse(c) + if !c.matchLast { + if currentCount >= p.ranges[itemIndex][0] { + itemIndex++ + currentCount = 0 + continue + } + c.offset = from + if c.fromResults(p) { + if to > c.failOffset { + c.failOffset = -1 + c.failingParser = nil + } + if !p.allChars { + c.results.unmarkPending(from, p.id) + } + return + } + if c.failingParser == nil && p.commit&userDefined != 0 && p.commit&whitespace == 0 && p.commit&failPass == 0 { + c.failingParser = p + } + c.fail(from) + if !p.allChars { + c.results.unmarkPending(from, p.id) + } + return + } + parsed = c.offset > to + if parsed { + currentCount++ + } + to = c.offset + if !parsed || p.ranges[itemIndex][1] > 0 && currentCount == p.ranges[itemIndex][1] { + itemIndex++ + currentCount = 0 + } + } + if p.commit&noKeyword != 0 && c.isKeyword(from, to) { + if c.failingParser == nil && p.commit&userDefined != 0 && p.commit&whitespace == 0 && p.commit&failPass == 0 { + c.failingParser = p + } + c.fail(from) + if !p.allChars { + c.results.unmarkPending(from, p.id) + } + return + } + for _, g := range p.generalizations { + if c.results.pending(from, g) { + c.results.setMatch(from, g, to) + } + } + if to > c.failOffset { + c.failOffset = -1 + c.failingParser = nil + } + c.results.setMatch(from, p.id, to) + c.success(to) + if !p.allChars { + c.results.unmarkPending(from, p.id) + } +} +func (b *sequenceBuilder) nodeName() string { + return b.name +} +func (b *sequenceBuilder) nodeID() int { + return b.id +} +func (b *sequenceBuilder) build(c *context) ([]*node, bool) { + to, ok := c.results.longestMatch(c.offset, b.id) + if !ok { + return nil, false + } + from := c.offset + parsed := to > from + if b.allChars { + c.offset = to + if b.commit&alias != 0 { + return nil, true + } + return []*node{{Name: b.name, From: from, To: to, tokens: c.tokens}}, true + } else if parsed { + c.results.dropMatchTo(c.offset, b.id, to) + for _, g := range b.generalizations { + c.results.dropMatchTo(c.offset, g, to) + } + } else { + if c.results.pending(c.offset, b.id) { + return nil, false + } + c.results.markPending(c.offset, b.id) + for _, g := range b.generalizations { + c.results.markPending(c.offset, g) + } + } + var ( + itemIndex int + currentCount int + nodes []*node + ) + for itemIndex < len(b.items) { + itemFrom := c.offset + n, ok := b.items[itemIndex].build(c) + if !ok { + itemIndex++ + currentCount = 0 + continue + } + if c.offset > itemFrom { + nodes = append(nodes, n...) + currentCount++ + if b.ranges[itemIndex][1] > 0 && currentCount == b.ranges[itemIndex][1] { + itemIndex++ + currentCount = 0 + } + continue + } + if currentCount < b.ranges[itemIndex][0] { + for i := 0; i < b.ranges[itemIndex][0]-currentCount; i++ { + nodes = append(nodes, n...) + } + } + itemIndex++ + currentCount = 0 + } + if !parsed { + c.results.unmarkPending(from, b.id) + for _, g := range b.generalizations { + c.results.unmarkPending(from, g) + } + } + if b.commit&alias != 0 { + return nodes, true + } + return []*node{{Name: b.name, From: from, To: to, Nodes: nodes, tokens: c.tokens}}, true +} + +type choiceParser struct { + name string + id int + commit commitType + options []parser + generalizations []int +} +type choiceBuilder struct { + name string + id int + commit commitType + options []builder + generalizations []int +} + +func (p *choiceParser) nodeName() string { + return p.name +} +func (p *choiceParser) nodeID() int { + return p.id +} +func (p *choiceParser) commitType() commitType { + return p.commit +} +func (p *choiceParser) parse(c *context) { + if c.fromResults(p) { + return + } + if c.results.pending(c.offset, p.id) { + c.fail(c.offset) + return + } + c.results.markPending(c.offset, p.id) + var ( + match bool + optionIndex int + foundMatch bool + failingParser parser + ) + from := c.offset + to := c.offset + initialFailOffset := c.failOffset + initialFailingParser := c.failingParser + failOffset := initialFailOffset + for { + foundMatch = false + optionIndex = 0 + for optionIndex < len(p.options) { + p.options[optionIndex].parse(c) + optionIndex++ + if !c.matchLast { + if c.failOffset > failOffset { + failOffset = c.failOffset + failingParser = c.failingParser + } + } + if !c.matchLast || match && c.offset <= to { + c.offset = from + continue + } + match = true + foundMatch = true + to = c.offset + c.offset = from + c.results.setMatch(from, p.id, to) + } + if !foundMatch { + break + } + } + if match { + if p.commit&noKeyword != 0 && c.isKeyword(from, to) { + if c.failingParser == nil && p.commit&userDefined != 0 && p.commit&whitespace == 0 && p.commit&failPass == 0 { + c.failingParser = p + } + c.fail(from) + c.results.unmarkPending(from, p.id) + return + } + if failOffset > to { + c.failOffset = failOffset + c.failingParser = failingParser + } else if to > initialFailOffset { + c.failOffset = -1 + c.failingParser = nil + } else { + c.failOffset = initialFailOffset + c.failingParser = initialFailingParser + } + c.success(to) + c.results.unmarkPending(from, p.id) + return + } + if failOffset > initialFailOffset { + c.failOffset = failOffset + c.failingParser = failingParser + if c.failingParser == nil && p.commitType()&userDefined != 0 && p.commitType()&whitespace == 0 && p.commitType()&failPass == 0 { + c.failingParser = p + } + } + c.results.setNoMatch(from, p.id) + c.fail(from) + c.results.unmarkPending(from, p.id) +} +func (b *choiceBuilder) nodeName() string { + return b.name +} +func (b *choiceBuilder) nodeID() int { + return b.id +} +func (b *choiceBuilder) build(c *context) ([]*node, bool) { + to, ok := c.results.longestMatch(c.offset, b.id) + if !ok { + return nil, false + } + from := c.offset + parsed := to > from + if parsed { + c.results.dropMatchTo(c.offset, b.id, to) + for _, g := range b.generalizations { + c.results.dropMatchTo(c.offset, g, to) + } + } else { + if c.results.pending(c.offset, b.id) { + return nil, false + } + c.results.markPending(c.offset, b.id) + for _, g := range b.generalizations { + c.results.markPending(c.offset, g) + } + } + var option builder + for _, o := range b.options { + if c.results.hasMatchTo(c.offset, o.nodeID(), to) { + option = o + break + } + } + n, _ := option.build(c) + if !parsed { + c.results.unmarkPending(from, b.id) + for _, g := range b.generalizations { + c.results.unmarkPending(from, g) + } + } + if b.commit&alias != 0 { + return n, true + } + return []*node{{Name: b.name, From: from, To: to, Nodes: n, tokens: c.tokens}}, true +} + +type idSet struct{ ids []uint } + +func divModBits(id int) (int, int) { + return id / strconv.IntSize, id % strconv.IntSize +} +func (s *idSet) set(id int) { + d, m := divModBits(id) + if d >= len(s.ids) { + if d < cap(s.ids) { + s.ids = s.ids[:d+1] + } else { + s.ids = s.ids[:cap(s.ids)] + for i := cap(s.ids); i <= d; i++ { + s.ids = append(s.ids, 0) + } + } + } + s.ids[d] |= 1 << uint(m) +} +func (s *idSet) unset(id int) { + d, m := divModBits(id) + if d >= len(s.ids) { + return + } + s.ids[d] &^= 1 << uint(m) +} +func (s *idSet) has(id int) bool { + d, m := divModBits(id) + if d >= len(s.ids) { + return false + } + return s.ids[d]&(1< offset { + return ints + } + if cap(ints) > offset { + ints = ints[:offset+1] + return ints + } + ints = ints[:cap(ints)] + for i := len(ints); i <= offset; i++ { + ints = append(ints, nil) + } + return ints +} +func ensureOffsetIDs(ids []*idSet, offset int) []*idSet { + if len(ids) > offset { + return ids + } + if cap(ids) > offset { + ids = ids[:offset+1] + return ids + } + ids = ids[:cap(ids)] + for i := len(ids); i <= offset; i++ { + ids = append(ids, nil) + } + return ids +} +func (r *results) setMatch(offset, id, to int) { + r.match = ensureOffsetInts(r.match, offset) + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id || r.match[offset][i+1] != to { + continue + } + return + } + r.match[offset] = append(r.match[offset], id, to) +} +func (r *results) setNoMatch(offset, id int) { + if len(r.match) > offset { + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id { + continue + } + return + } + } + r.noMatch = ensureOffsetIDs(r.noMatch, offset) + if r.noMatch[offset] == nil { + r.noMatch[offset] = &idSet{} + } + r.noMatch[offset].set(id) +} +func (r *results) hasMatchTo(offset, id, to int) bool { + if len(r.match) <= offset { + return false + } + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id { + continue + } + if r.match[offset][i+1] == to { + return true + } + } + return false +} +func (r *results) longestMatch(offset, id int) (int, bool) { + if len(r.match) <= offset { + return 0, false + } + var found bool + to := -1 + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id { + continue + } + if r.match[offset][i+1] > to { + to = r.match[offset][i+1] + } + found = true + } + return to, found +} +func (r *results) longestResult(offset, id int) (int, bool, bool) { + if len(r.noMatch) > offset && r.noMatch[offset] != nil && r.noMatch[offset].has(id) { + return 0, false, true + } + to, ok := r.longestMatch(offset, id) + return to, ok, ok +} +func (r *results) dropMatchTo(offset, id, to int) { + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id { + continue + } + if r.match[offset][i+1] == to { + r.match[offset][i] = -1 + return + } + } +} +func (r *results) resetPending() { + r.isPending = nil +} +func (r *results) pending(offset, id int) bool { + if len(r.isPending) <= id { + return false + } + for i := range r.isPending[id] { + if r.isPending[id][i] == offset { + return true + } + } + return false +} +func (r *results) markPending(offset, id int) { + r.isPending = ensureOffsetInts(r.isPending, id) + for i := range r.isPending[id] { + if r.isPending[id][i] == -1 { + r.isPending[id][i] = offset + return + } + } + r.isPending[id] = append(r.isPending[id], offset) +} +func (r *results) unmarkPending(offset, id int) { + for i := range r.isPending[id] { + if r.isPending[id][i] == offset { + r.isPending[id][i] = -1 + break + } + } +} + +type context struct { + reader io.RuneReader + keywords []parser + offset int + readOffset int + consumed int + offsetLimit int + failOffset int + failingParser parser + readErr error + eof bool + results *results + tokens []rune + matchLast bool +} + +func newContext(r io.RuneReader, keywords []parser) *context { + return &context{reader: r, keywords: keywords, results: &results{}, offsetLimit: -1, failOffset: -1} +} +func (c *context) read() bool { + if c.eof || c.readErr != nil { + return false + } + token, n, err := c.reader.ReadRune() + if err != nil { + if errors.Is(err, io.EOF) { + if n == 0 { + c.eof = true + return false + } + } else { + c.readErr = err + return false + } + } + c.readOffset++ + if token == unicode.ReplacementChar { + c.readErr = errInvalidUnicodeCharacter + return false + } + c.tokens = append(c.tokens, token) + return true +} +func (c *context) token() (rune, bool) { + if c.offset == c.offsetLimit { + return 0, false + } + if c.offset == c.readOffset { + if !c.read() { + return 0, false + } + } + return c.tokens[c.offset], true +} +func (c *context) fromResults(p parser) bool { + to, m, ok := c.results.longestResult(c.offset, p.nodeID()) + if !ok { + return false + } + if m { + c.success(to) + } else { + c.fail(c.offset) + } + return true +} +func (c *context) isKeyword(from, to int) bool { + ol := c.offsetLimit + c.offsetLimit = to + defer func() { + c.offsetLimit = ol + }() + for _, kw := range c.keywords { + c.offset = from + kw.parse(c) + if c.matchLast && c.offset == to { + return true + } + } + return false +} +func (c *context) success(to int) { + c.offset = to + c.matchLast = true + if to > c.consumed { + c.consumed = to + } +} +func (c *context) fail(offset int) { + c.offset = offset + c.matchLast = false +} +func findLine(tokens []rune, offset int) (line, column int) { + tokens = tokens[:offset] + for i := range tokens { + column++ + if tokens[i] == '\n' { + column = 0 + line++ + } + } + return +} +func (c *context) parseError(p parser) error { + definition := p.nodeName() + flagIndex := strings.Index(definition, ":") + if flagIndex > 0 { + definition = definition[:flagIndex] + } + if c.failingParser == nil { + c.failOffset = c.consumed + } + line, col := findLine(c.tokens, c.failOffset) + return &parseError{Offset: c.failOffset, Line: line, Column: col, Definition: definition} +} +func (c *context) finalizeParse(root parser) error { + fp := c.failingParser + if fp == nil { + fp = root + } + to, match, found := c.results.longestResult(0, root.nodeID()) + if !found || !match || found && match && to < c.readOffset { + return c.parseError(fp) + } + c.read() + if c.eof { + return nil + } + if c.readErr != nil { + return c.readErr + } + return c.parseError(root) +} + +type node struct { + Name string + Nodes []*node + From int + To int + tokens []rune +} + +func (n *node) Tokens() []rune { + return n.tokens +} +func (n *node) String() string { + return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text()) +} +func (n *node) Text() string { + return string(n.Tokens()[n.From:n.To]) +} + +type commitType int + +const ( + none commitType = 0 + alias commitType = 1 << iota + whitespace + noWhitespace + keyword + noKeyword + failPass + root + userDefined +) + +type formatFlags int + +const ( + formatNone formatFlags = 0 + formatPretty formatFlags = 1 << iota + formatIncludeComments +) + +type parseError struct { + Input string + Offset int + Line int + Column int + Definition string +} +type parser interface { + nodeName() string + nodeID() int + commitType() commitType + parse(*context) +} +type builder interface { + nodeName() string + nodeID() int + build(*context) ([]*node, bool) +} + +var errInvalidUnicodeCharacter = errors.New("invalid unicode character") + +func (pe *parseError) Error() string { + return fmt.Sprintf("%s:%d:%d:parse failed, parsing: %s", pe.Input, pe.Line+1, pe.Column+1, pe.Definition) +} +func parseInput(r io.Reader, p parser, b builder, kw []parser) (*node, error) { + c := newContext(bufio.NewReader(r), kw) + p.parse(c) + if c.readErr != nil { + return nil, c.readErr + } + if err := c.finalizeParse(p); err != nil { + if perr, ok := err.(*parseError); ok { + perr.Input = "" + } + return nil, err + } + c.offset = 0 + c.results.resetPending() + n, _ := b.build(c) + return n[0], nil +} + +// eo head + +func parse(r io.Reader) (*node, error) { + +var p65 = sequenceParser{id: 65, commit: 128,ranges: [][]int{{0, -1},{1, 1},{0, -1},},};var p63 = choiceParser{id: 63, commit: 2,};var p62 = choiceParser{id: 62, commit: 262,name: "ignore",generalizations: []int{63,},};var p2 = sequenceParser{id: 2, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{62,63,},};var p1 = charParser{id: 1,chars: []rune{32,},};p2.items = []parser{&p1,};var p4 = sequenceParser{id: 4, commit: 2,allChars: true,ranges: [][]int{{1, 1},},generalizations: []int{62,63,},};var p3 = charParser{id: 3,chars: []rune{9,},};p4.items = []parser{&p3,};var p6 = sequenceParser{id: 6, commit: 2,allChars: true,ranges: [][]int{{1, 1},},generalizations: []int{62,63,},};var p5 = charParser{id: 5,chars: []rune{13,},};p6.items = []parser{&p5,};var p8 = sequenceParser{id: 8, commit: 2,allChars: true,ranges: [][]int{{1, 1},},generalizations: []int{62,63,},};var p7 = charParser{id: 7,chars: []rune{10,},};p8.items = []parser{&p7,};p62.options = []parser{&p2,&p4,&p6,&p8,};p63.options = []parser{&p62,};var p64 = choiceParser{id: 64, commit: 258,name: "result:wsroot",};var p56 = choiceParser{id: 56, commit: 258,name: "expression",generalizations: []int{64,},};var p31 = sequenceParser{id: 31, commit: 264,name: "num",ranges: [][]int{{0, 1},{1, 1},{0, 1},{0, 1},{0, 1},{1, 1},{0, 1},{0, 1},},generalizations: []int{56,47,48,64,},};var p10 = sequenceParser{id: 10, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p9 = charParser{id: 9,chars: []rune{45,},};p10.items = []parser{&p9,};var p18 = choiceParser{id: 18, commit: 10,};var p12 = sequenceParser{id: 12, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{18,},};var p11 = charParser{id: 11,chars: []rune{48,},};p12.items = []parser{&p11,};var p17 = sequenceParser{id: 17, commit: 10,ranges: [][]int{{1, 1},{0, -1},{1, 1},{0, -1},},generalizations: []int{18,},};var p14 = sequenceParser{id: 14, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p13 = charParser{id: 13,ranges: [][]rune{{49, 57},},};p14.items = []parser{&p13,};var p16 = sequenceParser{id: 16, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p15 = charParser{id: 15,ranges: [][]rune{{48, 57},},};p16.items = []parser{&p15,};p17.items = []parser{&p14,&p16,};p18.options = []parser{&p12,&p17,};var p23 = sequenceParser{id: 23, commit: 10,ranges: [][]int{{1, 1},{1, -1},{1, 1},{1, -1},},};var p20 = sequenceParser{id: 20, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p19 = charParser{id: 19,chars: []rune{46,},};p20.items = []parser{&p19,};var p22 = sequenceParser{id: 22, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p21 = charParser{id: 21,ranges: [][]rune{{48, 57},},};p22.items = []parser{&p21,};p23.items = []parser{&p20,&p22,};var p30 = sequenceParser{id: 30, commit: 10,ranges: [][]int{{1, 1},{0, 1},{1, -1},{1, 1},{0, 1},{1, -1},},};var p25 = sequenceParser{id: 25, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p24 = charParser{id: 24,chars: []rune{101,69,},};p25.items = []parser{&p24,};var p27 = sequenceParser{id: 27, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p26 = charParser{id: 26,chars: []rune{43,45,},};p27.items = []parser{&p26,};var p29 = sequenceParser{id: 29, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p28 = charParser{id: 28,ranges: [][]rune{{48, 57},},};p29.items = []parser{&p28,};p30.items = []parser{&p25,&p27,&p29,};p31.items = []parser{&p10,&p18,&p23,&p30,};var p44 = sequenceParser{id: 44, commit: 258,name: "group",ranges: [][]int{{1, 1},{0, -1},{1, 1},{0, -1},{1, 1},},generalizations: []int{56,47,48,64,},};var p41 = sequenceParser{id: 41, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p40 = charParser{id: 40,chars: []rune{40,},};p41.items = []parser{&p40,};var p43 = sequenceParser{id: 43, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var p42 = charParser{id: 42,chars: []rune{41,},};p43.items = []parser{&p42,};p44.items = []parser{&p41,&p63,&p56,&p63,&p43,};var p55 = choiceParser{id: 55, commit: 258,name: "binary",generalizations: []int{56,64,},};var p51 = sequenceParser{id: 51, commit: 256,name: "binary0",ranges: [][]int{{1, 1},{0, -1},{1, 1},{0, -1},},generalizations: []int{55,48,56,64,},};var p47 = choiceParser{id: 47, commit: 258,name: "operand0",generalizations: []int{48,},};p47.options = []parser{&p31,&p44,};var p49 = sequenceParser{id: 49, commit: 2,ranges: [][]int{{1, 1},{0, -1},{1, 1},},};var p45 = choiceParser{id: 45, commit: 258,name: "op0",};var p37 = sequenceParser{id: 37, commit: 264,name: "mul",allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{45,},};var p36 = charParser{id: 36,chars: []rune{42,},};p37.items = []parser{&p36,};var p39 = sequenceParser{id: 39, commit: 264,name: "div",allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{45,},};var p38 = charParser{id: 38,chars: []rune{47,},};p39.items = []parser{&p38,};p45.options = []parser{&p37,&p39,};p49.items = []parser{&p45,&p63,&p47,};var p50 = sequenceParser{id: 50, commit: 2,ranges: [][]int{{0, -1},{1, 1},},};p50.items = []parser{&p63,&p49,};p51.items = []parser{&p47,&p63,&p49,&p50,};var p54 = sequenceParser{id: 54, commit: 256,name: "binary1",ranges: [][]int{{1, 1},{0, -1},{1, 1},{0, -1},},generalizations: []int{55,56,64,},};var p48 = choiceParser{id: 48, commit: 258,name: "operand1",};p48.options = []parser{&p47,&p51,};var p52 = sequenceParser{id: 52, commit: 2,ranges: [][]int{{1, 1},{0, -1},{1, 1},},};var p46 = choiceParser{id: 46, commit: 258,name: "op1",};var p33 = sequenceParser{id: 33, commit: 264,name: "add",allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{46,},};var p32 = charParser{id: 32,chars: []rune{43,},};p33.items = []parser{&p32,};var p35 = sequenceParser{id: 35, commit: 264,name: "sub",allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{46,},};var p34 = charParser{id: 34,chars: []rune{45,},};p35.items = []parser{&p34,};p46.options = []parser{&p33,&p35,};p52.items = []parser{&p46,&p63,&p48,};var p53 = sequenceParser{id: 53, commit: 2,ranges: [][]int{{0, -1},{1, 1},},};p53.items = []parser{&p63,&p52,};p54.items = []parser{&p48,&p63,&p52,&p53,};p55.options = []parser{&p51,&p54,};p56.options = []parser{&p31,&p44,&p55,};var p61 = sequenceParser{id: 61, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},{1, 1},{1, 1},{1, 1},{1, 1},{1, 1},{1, 1},},generalizations: []int{64,},};var p57 = charParser{id: 57,chars: []rune{101,},};var p58 = charParser{id: 58,chars: []rune{120,},};var p59 = charParser{id: 59,chars: []rune{105,},};var p60 = charParser{id: 60,chars: []rune{116,},};p61.items = []parser{&p57,&p58,&p59,&p60,};p64.options = []parser{&p56,&p61,};p65.items = []parser{&p63,&p64,&p63,};var b65 = sequenceBuilder{id: 65, commit: 128,name: "result",ranges: [][]int{{0, -1},{1, 1},{0, -1},},};var b63 = choiceBuilder{id: 63, commit: 2,};var b62 = choiceBuilder{id: 62, commit: 262,generalizations: []int{63,},};var b2 = sequenceBuilder{id: 2, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{62,63,},};var b1 = charBuilder{};b2.items = []builder{&b1,};var b4 = sequenceBuilder{id: 4, commit: 2,allChars: true,ranges: [][]int{{1, 1},},generalizations: []int{62,63,},};var b3 = charBuilder{};b4.items = []builder{&b3,};var b6 = sequenceBuilder{id: 6, commit: 2,allChars: true,ranges: [][]int{{1, 1},},generalizations: []int{62,63,},};var b5 = charBuilder{};b6.items = []builder{&b5,};var b8 = sequenceBuilder{id: 8, commit: 2,allChars: true,ranges: [][]int{{1, 1},},generalizations: []int{62,63,},};var b7 = charBuilder{};b8.items = []builder{&b7,};b62.options = []builder{&b2,&b4,&b6,&b8,};b63.options = []builder{&b62,};var b64 = choiceBuilder{id: 64, commit: 258,};var b56 = choiceBuilder{id: 56, commit: 258,generalizations: []int{64,},};var b31 = sequenceBuilder{id: 31, commit: 264,name: "num",ranges: [][]int{{0, 1},{1, 1},{0, 1},{0, 1},{0, 1},{1, 1},{0, 1},{0, 1},},generalizations: []int{56,47,48,64,},};var b10 = sequenceBuilder{id: 10, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b9 = charBuilder{};b10.items = []builder{&b9,};var b18 = choiceBuilder{id: 18, commit: 10,};var b12 = sequenceBuilder{id: 12, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{18,},};var b11 = charBuilder{};b12.items = []builder{&b11,};var b17 = sequenceBuilder{id: 17, commit: 10,ranges: [][]int{{1, 1},{0, -1},{1, 1},{0, -1},},generalizations: []int{18,},};var b14 = sequenceBuilder{id: 14, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b13 = charBuilder{};b14.items = []builder{&b13,};var b16 = sequenceBuilder{id: 16, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b15 = charBuilder{};b16.items = []builder{&b15,};b17.items = []builder{&b14,&b16,};b18.options = []builder{&b12,&b17,};var b23 = sequenceBuilder{id: 23, commit: 10,ranges: [][]int{{1, 1},{1, -1},{1, 1},{1, -1},},};var b20 = sequenceBuilder{id: 20, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b19 = charBuilder{};b20.items = []builder{&b19,};var b22 = sequenceBuilder{id: 22, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b21 = charBuilder{};b22.items = []builder{&b21,};b23.items = []builder{&b20,&b22,};var b30 = sequenceBuilder{id: 30, commit: 10,ranges: [][]int{{1, 1},{0, 1},{1, -1},{1, 1},{0, 1},{1, -1},},};var b25 = sequenceBuilder{id: 25, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b24 = charBuilder{};b25.items = []builder{&b24,};var b27 = sequenceBuilder{id: 27, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b26 = charBuilder{};b27.items = []builder{&b26,};var b29 = sequenceBuilder{id: 29, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b28 = charBuilder{};b29.items = []builder{&b28,};b30.items = []builder{&b25,&b27,&b29,};b31.items = []builder{&b10,&b18,&b23,&b30,};var b44 = sequenceBuilder{id: 44, commit: 258,ranges: [][]int{{1, 1},{0, -1},{1, 1},{0, -1},{1, 1},},generalizations: []int{56,47,48,64,},};var b41 = sequenceBuilder{id: 41, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b40 = charBuilder{};b41.items = []builder{&b40,};var b43 = sequenceBuilder{id: 43, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},},};var b42 = charBuilder{};b43.items = []builder{&b42,};b44.items = []builder{&b41,&b63,&b56,&b63,&b43,};var b55 = choiceBuilder{id: 55, commit: 258,generalizations: []int{56,64,},};var b51 = sequenceBuilder{id: 51, commit: 256,name: "binary0",ranges: [][]int{{1, 1},{0, -1},{1, 1},{0, -1},},generalizations: []int{55,48,56,64,},};var b47 = choiceBuilder{id: 47, commit: 258,generalizations: []int{48,},};b47.options = []builder{&b31,&b44,};var b49 = sequenceBuilder{id: 49, commit: 2,ranges: [][]int{{1, 1},{0, -1},{1, 1},},};var b45 = choiceBuilder{id: 45, commit: 258,};var b37 = sequenceBuilder{id: 37, commit: 264,name: "mul",allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{45,},};var b36 = charBuilder{};b37.items = []builder{&b36,};var b39 = sequenceBuilder{id: 39, commit: 264,name: "div",allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{45,},};var b38 = charBuilder{};b39.items = []builder{&b38,};b45.options = []builder{&b37,&b39,};b49.items = []builder{&b45,&b63,&b47,};var b50 = sequenceBuilder{id: 50, commit: 2,ranges: [][]int{{0, -1},{1, 1},},};b50.items = []builder{&b63,&b49,};b51.items = []builder{&b47,&b63,&b49,&b50,};var b54 = sequenceBuilder{id: 54, commit: 256,name: "binary1",ranges: [][]int{{1, 1},{0, -1},{1, 1},{0, -1},},generalizations: []int{55,56,64,},};var b48 = choiceBuilder{id: 48, commit: 258,};b48.options = []builder{&b47,&b51,};var b52 = sequenceBuilder{id: 52, commit: 2,ranges: [][]int{{1, 1},{0, -1},{1, 1},},};var b46 = choiceBuilder{id: 46, commit: 258,};var b33 = sequenceBuilder{id: 33, commit: 264,name: "add",allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{46,},};var b32 = charBuilder{};b33.items = []builder{&b32,};var b35 = sequenceBuilder{id: 35, commit: 264,name: "sub",allChars: true,ranges: [][]int{{1, 1},{1, 1},},generalizations: []int{46,},};var b34 = charBuilder{};b35.items = []builder{&b34,};b46.options = []builder{&b33,&b35,};b52.items = []builder{&b46,&b63,&b48,};var b53 = sequenceBuilder{id: 53, commit: 2,ranges: [][]int{{0, -1},{1, 1},},};b53.items = []builder{&b63,&b52,};b54.items = []builder{&b48,&b63,&b52,&b53,};b55.options = []builder{&b51,&b54,};b56.options = []builder{&b31,&b44,&b55,};var b61 = sequenceBuilder{id: 61, commit: 10,allChars: true,ranges: [][]int{{1, 1},{1, 1},{1, 1},{1, 1},{1, 1},{1, 1},{1, 1},{1, 1},},generalizations: []int{64,},};var b57 = charBuilder{};var b58 = charBuilder{};var b59 = charBuilder{};var b60 = charBuilder{};b61.items = []builder{&b57,&b58,&b59,&b60,};b64.options = []builder{&b56,&b61,};b65.items = []builder{&b63,&b64,&b63,}; + +var keywords = []parser{} + +return parseInput(r, &p65, &b65, keywords) +} diff --git a/examples/json.treerack b/docs/examples/json.treerack similarity index 100% rename from examples/json.treerack rename to docs/examples/json.treerack diff --git a/examples/keyval.treerack b/docs/examples/keyval.treerack similarity index 100% rename from examples/keyval.treerack rename to docs/examples/keyval.treerack diff --git a/examples/mml-exp.treerack b/docs/examples/mml-exp.treerack similarity index 100% rename from examples/mml-exp.treerack rename to docs/examples/mml-exp.treerack diff --git a/examples/mml-exp2.treerack b/docs/examples/mml-exp2.treerack similarity index 100% rename from examples/mml-exp2.treerack rename to docs/examples/mml-exp2.treerack diff --git a/examples/mml-exp3.treerack b/docs/examples/mml-exp3.treerack similarity index 100% rename from examples/mml-exp3.treerack rename to docs/examples/mml-exp3.treerack diff --git a/examples/mml.treerack b/docs/examples/mml.treerack similarity index 100% rename from examples/mml.treerack rename to docs/examples/mml.treerack diff --git a/examples/scheme.treerack b/docs/examples/scheme.treerack similarity index 100% rename from examples/scheme.treerack rename to docs/examples/scheme.treerack diff --git a/examples/sexpr.treerack b/docs/examples/sexpr.treerack similarity index 100% rename from examples/sexpr.treerack rename to docs/examples/sexpr.treerack diff --git a/examples/test.mml b/docs/examples/test.mml similarity index 100% rename from examples/test.mml rename to docs/examples/test.mml diff --git a/docs/manual.md b/docs/manual.md new file mode 100644 index 0000000..8b0b204 --- /dev/null +++ b/docs/manual.md @@ -0,0 +1,629 @@ +# Treerack Manual + +This manual describes the primary use cases and workflows supported by Treerack. + +## Prerequisits + +We assume a working installation of the standard Go tooling. + +This manual relies on the treerack command-line tool. We can install it using one of the following methods. + +**A. source installation (requires make):** + +1. clone the repository `git clone https://code.squareroundforest.org/arpio/treerack` +2. navigate to the source directory, run: `make install`. To install it to a custom location, use the `prefix` +environment variable, e.g. run `prefix=~/.local make install` +3. verify the installation: run `treerack version` and `man treerack` + +**B. via go install:** + +Alternatively, we _may be able to_ install directly using the Go toolchain: + +1. run `go install code.squareroundforest.org/arpio/treerack/cmd/treerack` +2. verify: `treerack help` + +## Hello syntax + +A basic syntax definition looks like this: + +``` +hello = "Hello, world!" +``` + +This definition matches only the exact string "Hello, world!" and nothing else. To test the validity of this +rule, run: + +``` +treerack check-syntax --syntax-string 'hello = "Hello, world!"' +``` + +If successful, the command exits silently with code 0. (We can append && echo ok to advertise successful +execution). + +To test the syntax against actual input content: + +``` +treerack check --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!' +``` + +To visualize the resulting Abstract Syntax Tree (AST), use the show subcommand: + +``` +treerack show --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!' +``` + +The output will be raw JSON: + +``` +{"name":"hello","from":0,"to":13,"text":"Hello, world!"} +``` + +For a more readable output, add the --pretty flag: + +``` +treerack show --pretty --syntax-string 'hello = "Hello, world!"' --input-string 'Hello, world!' +``` + +...then the output will look like this: + +``` +{ + "name": "hello", + "from": 0, + "to": 13, + "text": "Hello, world!" +} +``` + +### Handling errors + +If our syntax definition is invalid, check-syntax will fail: + +``` +treerack check-syntax --syntax-string 'foo = bar' +``` + +The above command will fail because the parser called foo references an undefined parser bar. + +We can use check or show to detect when the input content does not match a valid syntax. Using the hello syntax, +we can try the following: + +``` +treerack check --syntax-string 'hello = "Hello, world!"' --input-string 'Hi!' +``` + +It will show that parsing the input failed and that it failed while using the parser hello. + +## Basic syntax - An arithmetic calculator + +In this section, we will build a basic arithmetic calculator. It will read a line from standard input, parse it +as an arithmetic expression, compute the result, and print it—effectively creating a REPL (Read-Eval-Print +Loop). + +We will support addition +, subtraction -, multiplication *, division /, and grouping with parentheses (). + +acalc.treerack: + +``` +// Define whitespace characters. +// The :ws flag marks this as the global whitespace handler. +ignore:ws = " " | [\t] | [\r] | [\n]; + +// Define the number format. +// +// The :nows flag ensures we do not skip whitespace *inside* the number token. We support integers, floats, and +// scientific notation (e.g., 1.5e3). Arbitrary leading zeros are disallowed to prevent confusion with octal +// literals. +num:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; + +// define the supported operators: +add = "+"; +sub = "-"; +mul = "*"; +div = "/"; + +// Grouping logic. +// +// Expressions can be enclosed in parentheses. This references 'expression', which is defined later, +// demonstrating recursive definitions. The :alias flag prevents 'group' from creating its own node in the AST; +// only the child 'expression' will appear. +group:alias = "(" expression ")"; + +// Operator Precedence. +// +// We group operators by precedence levels to ensure correct order of operations. +// +// Level 0 (High): Multiplication/Division +op0:alias = mul | div; + +// Level 1 (Low): Addition/Subtraction +op1:alias = add | sub; + +// Operands for each precedence level. +// +// operand0 can be a raw number or a grouped expression. +operand0:alias = num | group; + +// operand1 can be a higher-precedence operand or a completed binary0 operation. +operand1:alias = operand0 | binary0; + +// Binary Expressions. +// +// We define these hierarchically. 'binary0' handles high-precedence operations (mul/div). +binary0 = operand0 (op0 operand0)+; +binary1 = operand1 (op1 operand1)+; +binary:alias = binary0 | binary1; + +// The generalized Expression. +// +// An expression is either a raw number, a group, or a binary operation. +expression:alias = num | group | binary; + +// Root Definition. +// +// The final result is either a valid expression or the "exit" command. Since 'expression' is an alias, we need +// a concrete root parser to anchor the AST. Note: The :root flag is optional here because this is the last +// definition in the file. +result = expression | "exit" +``` + +### Testing the syntax + +#### 1. Simple number + +``` +treerack show --pretty --syntax acalc.treerack --input-string 42 +``` + +Output: + +``` +{ + "name": "result", + "from": 0, + "to": 2, + "nodes": [ + { + "name": "num", + "from": 0, + "to": 2, + "text": "42" + } + ] +} +``` + +#### 2. Basic operation + +``` +treerack show --pretty --syntax acalc.treerack --input-string "42 + 24" +``` + +Output: + +``` +{ + "name": "expression", + "from": 0, + "to": 7, + "nodes": [ + { + "name": "binary1", + "from": 0, + "to": 7, + "nodes": [ + { + "name": "num", + "from": 0, + "to": 2, + "text": "42" + }, + { + "name": "add", + "from": 3, + "to": 4, + "text": "+" + }, + { + "name": "num", + "from": 5, + "to": 7, + "text": "24" + } + ] + } + ] +} +``` + +#### 3. Precedence check + +``` +treerack show --pretty --syntax acalc.treerack --input-string "42 + 24 * 2" +``` + +Output: + +``` +{ + "name": "result", + "from": 0, + "to": 11, + "nodes": [ + { + "name": "binary1", + "from": 0, + "to": 11, + "nodes": [ + { + "name": "num", + "from": 0, + "to": 2, + "text": "42" + }, + { + "name": "add", + "from": 3, + "to": 4, + "text": "+" + }, + { + "name": "binary0", + "from": 5, + "to": 11, + "nodes": [ + { + "name": "num", + "from": 5, + "to": 7, + "text": "24" + }, + { + "name": "mul", + "from": 8, + "to": 9, + "text": "*" + }, + { + "name": "num", + "from": 10, + "to": 11, + "text": "2" + } + ] + } + ] + } + ] +} +``` + +#### 4. Grouping override + +``` +treerack show --pretty --syntax acalc.treerack --input-string "(42 + 24) * 2" +``` + +Notice how the 'group' alias node is not present, but now the expression of the addition is a factor in the +multiplication: + +``` +{ + "name": "result", + "from": 0, + "to": 13, + "nodes": [ + { + "name": "binary0", + "from": 0, + "to": 13, + "nodes": [ + { + "name": "binary1", + "from": 1, + "to": 8, + "nodes": [ + { + "name": "num", + "from": 1, + "to": 3, + "text": "42" + }, + { + "name": "add", + "from": 4, + "to": 5, + "text": "+" + }, + { + "name": "num", + "from": 6, + "to": 8, + "text": "24" + } + ] + }, + { + "name": "mul", + "from": 10, + "to": 11, + "text": "*" + }, + { + "name": "num", + "from": 12, + "to": 13, + "text": "2" + } + ] + } + ] +} +``` + +## Generator - Implementing the calculator + +We will now generate the Go parser code and integrate it into a CLI application. + +Initialize the project: + +``` +go mod init acalc && go mod tidy +``` + +Generate the parser: + +``` +treerack generate --syntax acalc.treerack > parser.go +``` + +Implement the application logic in main.go. + +main.go: + +``` +package main + +import ( + "bufio" + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "os" + "strings" +) + +var errExit = errors.New("exit") + +// repl runs the Read-Eval-Print Loop. +func repl(input io.Reader, output io.Writer) { + + // use buffered io, to be able to read the input line-by-line: + buf := bufio.NewReader(os.Stdin) + + // our REPL loop: + for { + // print a basic prompt: + if _, err := output.Write([]byte("> ")); err != nil { + log.Fatalln(err) + } + + // read the input and handle the errors: + expr, err := read(buf) + + // Handle EOF (Ctrl+D) + if errors.Is(err, io.EOF) { + output.Write([]byte{'\n'}) + os.Exit(0) + } + + // Handle explicit exit command + if errors.Is(err, errExit) { + os.Exit(0) + } + + // Handle parser errors (allow user to retry) + var perr *parseError + if errors.As(err, &perr) { + log.Println(err) + continue + } + + if err != nil { + log.Fatalln(err) + } + + // Evaluate and print + result := eval(expr) + if err := print(output, result); err != nil { + log.Fatalln(err) + } + } +} + +func read(input *bufio.Reader) (*node, error) { + line, err := input.ReadString('\n') + if err != nil { + return nil, err + } + + // Parse the line using the generated parser + expr, err := parse(bytes.NewBufferString(line)) + if err != nil { + return nil, err + } + + if strings.TrimSpace(expr.Text()) == "exit" { + return nil, errExit + } + + // Based on our syntax, the root node always has exactly one child: + // either a number or a binary operation. + return expr.Nodes[0], nil +} + +// eval always returns the calculated result as a float64: +func eval(expr *node) float64 { + var value float64 + switch expr.Name { + case "num": + + // the number format in our syntax is based on the JSON spec, so we can piggy-back on it for the number + // parsing. In a real application, we would need to handle the errors here anyway, even if our parser + // already validated the input: + json.Unmarshal([]byte(expr.Text()), &value) + return value + default: + + // Handle binary expressions (recursively) + // Format: Operand [Operator Operand]... + value, expr.Nodes = eval(expr.Nodes[0]), expr.Nodes[1:] + for len(expr.Nodes) > 0 { + var ( + operator string + operand float64 + ) + + operator, operand, expr.Nodes = expr.Nodes[0].Name, eval(expr.Nodes[1]), expr.Nodes[2:] + switch operator { + case "add": + value += operand + case "sub": + value -= operand + case "mul": + value *= operand + case "div": + value /= operand // Go handles division by zero as ±Inf + } + } + } + + return value +} + +func print(output io.Writer, result float64) error { + _, err := fmt.Fprintln(output, result) + return err +} + +func main() { + // for testability, we define the REPL loop in a separate function so that the test code can call it with + // in-memory buffers as input and output. Our main function calls it with the stdio handles: + repl(os.Stdin, os.Stdout) +} +``` + +### Running the calculator + +Our arithmetic calculator is now ready. We can run it via `go run .`. An example session may look like this: + +``` +$ go run . +> (42 + 24) * 2 +132 +> 42 + 24 * 2 +90 +> 1 + 2 + 3 +6 +> exit +``` + +We can find the source files for this example here: [./examples/acalc](./examples/acalc). + +## Important Note: Unescaping + +Treerack does not automatically handle escape sequences (e.g., converting \n to a literal newline). If our +syntax supports escaped characters—common in string literals—the user code is responsible for "unescaping" the +raw text from the AST node. + +This is analogous to how we needed to parse the numbers in the calculator example to convert the string +representation of a number into a Go float64. + +## Programmatically loading syntaxes + +While generating static code via treerack generate is the recommended approach, we can also load definitions +dynamically at runtime. + +``` +package parser + +import ( + "io" + "code.squareroundforest.org/arpio/treerack" +) + +func initAndParse(syntax, content io.Reader) (*treerack.Node, error) { + s := &treerack.Syntax{} + if err := s.ReadSyntax(syntax); err != nil { + return nil, err + } + + if err := s.Init(); err != nil { + return nil, err + } + + return s.Parse(content) +} +``` + +Caution: Be mindful of security implications when loading syntax definitions from untrusted sources. + +## Programmatically defining syntaxes + +In rare cases where a syntax must be constructed computationally, we can define rules via the Go API: + +``` +package parser + +import ( + "io" + "code.squareroundforest.org/arpio/treerack" +) + +func initAndParse(content io.Reader) (*treerack.Node, error) { + s := &treerack.Syntax{} + + // whitespace: + s.Class("whitespace-chars", treerack.Alias, false, []rune{' ', '\t', '\r\, '\n'}, nil) + s.Choice("whitespace", treerack.Whitespace, "whitespace-chars") + + s.Class("digit", treerack.Alias, false, nil, [][]rune{'0', '9'}) + s.Sequence("number", treerack.NoWhitespace, treerack.SequenceItem{Name: "digit", Min: 1}) + s.Class("operator", treerack.None, false, []rune{'+', '-'}, nil) + s.Sequence( + "expression", + treerack.Root, + treerack.SequenceItem{Name: "number"}, + treerack.SequenceItem{Name: "operator"}, + treerack.SequenceItem{Name: "number"}, + ) + + if err := s.Init(); err != nil { + return nil, err + } + + return s.Parse(content) +} +``` + +## Summary + +We have demonstrated how to use the Treerack tool to define, test, and implement a parser. We recommend the +following workflow: + +1. draft: define a syntax in a .treerack file. +2. verify: use `treerack check` and `treerack show` to validate building blocks incrementally. +3. generate: use `treerack generate` to create embeddable Go code. + +**Links:** + +- the detailed documentation of the treerack definition language: [./syntax.md](./syntax.md) +- treerack command help: [../cmd/treerack/readme.md](../cmd/treerack/readme.md) or, if the command is installed, + `man treerack`, or `path/to/treerack help` +- the arithmetic calculator example: [./examples/acalc](./examples/acalc). +- additional examples: [./examples](./examples) + +Happy parsing! diff --git a/docs/syntax.md b/docs/syntax.md new file mode 100644 index 0000000..3af0c06 --- /dev/null +++ b/docs/syntax.md @@ -0,0 +1,121 @@ +# Treerack Syntax Definition Language + +The Treerack library uses a custom grammar description language derived from EBNF (Extended Backus-Naur Form). +It allows for the concise definition of recursive descent parsers. + +A syntax file consists of a series of Production Rules (definitions), terminated by semicolons. + +## Production Rules + +A rule assigns a name to a pattern expression. Rules may include optional flags to modify the parser's behavior +or the resulting AST (Abstract Syntax Tree). + +``` +RuleName = Expression; +RuleName:flag1:flag2 = Expression; +``` + +## Flags + +Flags are appended to the rule name, separated by colons. They control AST generation, whitespace handling, and +error propagation. + +- `alias`: Transparent Node. The rule validates input but does not create its own node in the AST. Children + nodes (if any) are attached to the parent of this rule. +- `ws`: Global Whitespace. Marks this rule as the designated whitespace handler. The parser will attempt to + match (and discard) this rule between tokens throughout the entire syntax. +- `nows`: No Whitespace. Disables automatic whitespace skipping inside this rule. Useful for defining tokens + like string literals where spaces are significant. +- `root`: Entry Point. Explicitly marks the rule as the starting point of the syntax. If omitted, the last + defined rule is implied to be the root. +- `kw`: Keyword. Marks the content as a reserved keyword. +- `nokw`: No Keyword. Prevents the rule from matching text that matches a defined kw rule. Essential for + distinguishing identifiers from keywords (e.g., ensuring var is not parsed as a variable name). +- `failpass`: Pass Failure. If this rule fails to parse, the error is reported as a failure of the parent rule, + not this specific rule. + +## Expressions + +Expressions define the structure of the text to be parsed. They are composed of terminals, sequences, choices, +and quantifiers. + +## Terminals + +Terminals match specific characters or strings in the input. + +- `"abc"` (string): Matches an exact sequence of characters. +- `.` (any char): Matches any single character (wildcard). +- `[123]`, `[a-z]`, `[123a-z]` (class): Matches a single character from a set or range. +- `[^123]`, `[^a-z]`, `[^123a-z]` (not class) Matches any single character not in the set. + +## Quantifiers + +Quantifiers determine how many times an item must match. They are placed immediately after the item they modify. + +- `?`: Optional (Zero or one). +- `*`: Zero or more. +- `+`: One or more. +- `{n}`: Exact count. Matches exactly n times. +- `{n,}`: At least. Matches n or more times. +- `{,m}`: At most. Matches between 0 and m times. +- `{n,m}`: Range. Matches between n and m times. + +## Composites + +Complex patterns are built by combining terminals and other rules. + +### 1. Sequences + +Items written consecutively are matched in order. + +``` +// Matches "A", then "B", then "C" +MySequence = "A" "B" "C"; +``` + +### 2. Grouping + +Parentheses (...) group items together, allowing quantifiers to apply to the entire group. + +``` +// Matches "AB", "ABAB", "ABABAB"... +MyGroup = ("A" "B")+; +``` + +### 3. Choices + +The pipe | character represents a choice between alternatives. + +The parser evaluates all provided options against the input at the current position and selects the best match +based on the following priority rules: + +1. _Longest Match_: The option that consumes the largest number of characters takes priority. This eliminates the +need to manually order specific matches before general ones (e.g., "integer" will always be chosen over "int" if +the input supports it, regardless of their order in the definition). +2. _First Definition Wins_: If multiple options consume the exact same number of characters, the option defined +first(left-most) in the list takes priority. + +``` +// Longest match wins automatically: +// Input "integer" is matched by 'type', even though "int" comes first. +type = "int" | "integer"; + +// Tie-breaker rule: +// If input is "foo", both options match 3 characters. +// Because 'identifier' is last, it takes priority over 'keyword'. +// (Use :kw and :nokw to control such situations, when it applies.) +content = keyword | identifier; +``` + +## Comments + +Comments follow C-style syntax and are ignored by the definition parser. + +- Line comments: Start with // and end at the newline. +- Block comments: Enclosed in /* ... */. + +## Examples + +- [JSON](examples/json.treerack) +- [Scheme](examples/scheme.treerack) +- [Treerack (itself)](../syntax.treerack) diff --git a/escape.go b/escape.go index 359c73e..d442b2c 100644 --- a/escape.go +++ b/escape.go @@ -61,33 +61,3 @@ func unescapeChar(c rune) rune { return c } } - -func unescape(escape rune, banned, chars []rune) ([]rune, error) { - var ( - unescaped []rune - escaped bool - ) - - for _, ci := range chars { - if escaped { - unescaped = append(unescaped, unescapeChar(ci)) - escaped = false - continue - } - - switch { - case ci == escape: - escaped = true - case runesContain(banned, ci): - return nil, ErrInvalidEscapeCharacter - default: - unescaped = append(unescaped, ci) - } - } - - if escaped { - return nil, ErrInvalidEscapeCharacter - } - - return unescaped, nil -} diff --git a/escape_test.go b/escape_test.go index 29f7e05..dab6555 100644 --- a/escape_test.go +++ b/escape_test.go @@ -2,33 +2,8 @@ package treerack import "testing" -func TestUnescape(t *testing.T) { - t.Run("char should be escaped", func(t *testing.T) { - if _, err := unescape('\\', []rune{'a'}, []rune{'a'}); err == nil { - t.Error("failed to fail") - } - }) - - t.Run("finished with escape char", func(t *testing.T) { - if _, err := unescape('\\', []rune{'a'}, []rune{'b', '\\'}); err == nil { - t.Error("failed to fail") - } - }) - - t.Run("unescapes", func(t *testing.T) { - u, err := unescape('\\', []rune{'a'}, []rune{'b', '\\', 'a'}) - if err != nil { - t.Error(err) - return - } - - if string(u) != "ba" { - t.Error("unescape failed") - } - }) -} - func TestEscape(t *testing.T) { + t.Skip() const ( banned = "\b\f\n\r\t\v" unescaped = "\b\f\n\r\t\v" diff --git a/head.go b/head.go index 7804c45..694248d 100644 --- a/head.go +++ b/head.go @@ -1,4 +1,4 @@ package treerack // generated with scripts/createhead.go -const headCode = "import (\n\t\"strconv\"\n\t\"errors\"\n\t\"io\"\n\t\"strings\"\n\t\"unicode\"\n\t\"fmt\"\n\t\"bufio\"\n)\n\ntype charParser struct {\n\tname\tstring\n\tid\tint\n\tnot\tbool\n\tchars\t[]rune\n\tranges\t[][]rune\n}\ntype charBuilder struct {\n\tname\tstring\n\tid\tint\n}\n\nfunc (p *charParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *charParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *charParser) commitType() commitType {\n\treturn alias\n}\nfunc matchChar(chars []rune, ranges [][]rune, not bool, char rune) bool {\n\tfor _, ci := range chars {\n\t\tif ci == char {\n\t\t\treturn !not\n\t\t}\n\t}\n\tfor _, ri := range ranges {\n\t\tif char >= ri[0] && char <= ri[1] {\n\t\t\treturn !not\n\t\t}\n\t}\n\treturn not\n}\nfunc (p *charParser) match(t rune) bool {\n\treturn matchChar(p.chars, p.ranges, p.not, t)\n}\nfunc (p *charParser) parse(c *context) {\n\tif tok, ok := c.token(); !ok || !p.match(tok) {\n\t\tif c.offset > c.failOffset {\n\t\t\tc.failOffset = c.offset\n\t\t\tc.failingParser = nil\n\t\t}\n\t\tc.fail(c.offset)\n\t\treturn\n\t}\n\tc.success(c.offset + 1)\n}\nfunc (b *charBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *charBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *charBuilder) build(c *context) ([]*node, bool) {\n\treturn nil, false\n}\n\ntype sequenceParser struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tcommitType\n\titems\t\t[]parser\n\tranges\t\t[][]int\n\tgeneralizations\t[]int\n\tallChars\tbool\n}\ntype sequenceBuilder struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tcommitType\n\titems\t\t[]builder\n\tranges\t\t[][]int\n\tgeneralizations\t[]int\n\tallChars\tbool\n}\n\nfunc (p *sequenceParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *sequenceParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *sequenceParser) commitType() commitType {\n\treturn p.commit\n}\nfunc (p *sequenceParser) parse(c *context) {\n\tif !p.allChars {\n\t\tif c.results.pending(c.offset, p.id) {\n\t\t\tc.fail(c.offset)\n\t\t\treturn\n\t\t}\n\t\tc.results.markPending(c.offset, p.id)\n\t}\n\tvar (\n\t\tcurrentCount\tint\n\t\tparsed\t\tbool\n\t)\n\titemIndex := 0\n\tfrom := c.offset\n\tto := c.offset\n\tfor itemIndex < len(p.items) {\n\t\tp.items[itemIndex].parse(c)\n\t\tif !c.matchLast {\n\t\t\tif currentCount >= p.ranges[itemIndex][0] {\n\t\t\t\titemIndex++\n\t\t\t\tcurrentCount = 0\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tc.offset = from\n\t\t\tif c.fromResults(p) {\n\t\t\t\tif to > c.failOffset {\n\t\t\t\t\tc.failOffset = -1\n\t\t\t\t\tc.failingParser = nil\n\t\t\t\t}\n\t\t\t\tif !p.allChars {\n\t\t\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\t\t}\n\t\t\t\treturn\n\t\t\t}\n\t\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&whitespace == 0 && p.commit&failPass == 0 {\n\t\t\t\tc.failingParser = p\n\t\t\t}\n\t\t\tc.fail(from)\n\t\t\tif !p.allChars {\n\t\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t\tparsed = c.offset > to\n\t\tif parsed {\n\t\t\tcurrentCount++\n\t\t}\n\t\tto = c.offset\n\t\tif !parsed || p.ranges[itemIndex][1] > 0 && currentCount == p.ranges[itemIndex][1] {\n\t\t\titemIndex++\n\t\t\tcurrentCount = 0\n\t\t}\n\t}\n\tif p.commit&noKeyword != 0 && c.isKeyword(from, to) {\n\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&whitespace == 0 && p.commit&failPass == 0 {\n\t\t\tc.failingParser = p\n\t\t}\n\t\tc.fail(from)\n\t\tif !p.allChars {\n\t\t\tc.results.unmarkPending(from, p.id)\n\t\t}\n\t\treturn\n\t}\n\tfor _, g := range p.generalizations {\n\t\tif c.results.pending(from, g) {\n\t\t\tc.results.setMatch(from, g, to)\n\t\t}\n\t}\n\tif to > c.failOffset {\n\t\tc.failOffset = -1\n\t\tc.failingParser = nil\n\t}\n\tc.results.setMatch(from, p.id, to)\n\tc.success(to)\n\tif !p.allChars {\n\t\tc.results.unmarkPending(from, p.id)\n\t}\n}\nfunc (b *sequenceBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *sequenceBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *sequenceBuilder) build(c *context) ([]*node, bool) {\n\tto, ok := c.results.longestMatch(c.offset, b.id)\n\tif !ok {\n\t\treturn nil, false\n\t}\n\tfrom := c.offset\n\tparsed := to > from\n\tif b.allChars {\n\t\tc.offset = to\n\t\tif b.commit&alias != 0 {\n\t\t\treturn nil, true\n\t\t}\n\t\treturn []*node{{Name: b.name, From: from, To: to, tokens: c.tokens}}, true\n\t} else if parsed {\n\t\tc.results.dropMatchTo(c.offset, b.id, to)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.dropMatchTo(c.offset, g, to)\n\t\t}\n\t} else {\n\t\tif c.results.pending(c.offset, b.id) {\n\t\t\treturn nil, false\n\t\t}\n\t\tc.results.markPending(c.offset, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.markPending(c.offset, g)\n\t\t}\n\t}\n\tvar (\n\t\titemIndex\tint\n\t\tcurrentCount\tint\n\t\tnodes\t\t[]*node\n\t)\n\tfor itemIndex < len(b.items) {\n\t\titemFrom := c.offset\n\t\tn, ok := b.items[itemIndex].build(c)\n\t\tif !ok {\n\t\t\titemIndex++\n\t\t\tcurrentCount = 0\n\t\t\tcontinue\n\t\t}\n\t\tif c.offset > itemFrom {\n\t\t\tnodes = append(nodes, n...)\n\t\t\tcurrentCount++\n\t\t\tif b.ranges[itemIndex][1] > 0 && currentCount == b.ranges[itemIndex][1] {\n\t\t\t\titemIndex++\n\t\t\t\tcurrentCount = 0\n\t\t\t}\n\t\t\tcontinue\n\t\t}\n\t\tif currentCount < b.ranges[itemIndex][0] {\n\t\t\tfor i := 0; i < b.ranges[itemIndex][0]-currentCount; i++ {\n\t\t\t\tnodes = append(nodes, n...)\n\t\t\t}\n\t\t}\n\t\titemIndex++\n\t\tcurrentCount = 0\n\t}\n\tif !parsed {\n\t\tc.results.unmarkPending(from, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.unmarkPending(from, g)\n\t\t}\n\t}\n\tif b.commit&alias != 0 {\n\t\treturn nodes, true\n\t}\n\treturn []*node{{Name: b.name, From: from, To: to, Nodes: nodes, tokens: c.tokens}}, true\n}\n\ntype choiceParser struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tcommitType\n\toptions\t\t[]parser\n\tgeneralizations\t[]int\n}\ntype choiceBuilder struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tcommitType\n\toptions\t\t[]builder\n\tgeneralizations\t[]int\n}\n\nfunc (p *choiceParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *choiceParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *choiceParser) commitType() commitType {\n\treturn p.commit\n}\nfunc (p *choiceParser) parse(c *context) {\n\tif c.fromResults(p) {\n\t\treturn\n\t}\n\tif c.results.pending(c.offset, p.id) {\n\t\tc.fail(c.offset)\n\t\treturn\n\t}\n\tc.results.markPending(c.offset, p.id)\n\tvar (\n\t\tmatch\t\tbool\n\t\toptionIndex\tint\n\t\tfoundMatch\tbool\n\t\tfailingParser\tparser\n\t)\n\tfrom := c.offset\n\tto := c.offset\n\tinitialFailOffset := c.failOffset\n\tinitialFailingParser := c.failingParser\n\tfailOffset := initialFailOffset\n\tfor {\n\t\tfoundMatch = false\n\t\toptionIndex = 0\n\t\tfor optionIndex < len(p.options) {\n\t\t\tp.options[optionIndex].parse(c)\n\t\t\toptionIndex++\n\t\t\tif !c.matchLast {\n\t\t\t\tif c.failOffset > failOffset {\n\t\t\t\t\tfailOffset = c.failOffset\n\t\t\t\t\tfailingParser = c.failingParser\n\t\t\t\t}\n\t\t\t}\n\t\t\tif !c.matchLast || match && c.offset <= to {\n\t\t\t\tc.offset = from\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tmatch = true\n\t\t\tfoundMatch = true\n\t\t\tto = c.offset\n\t\t\tc.offset = from\n\t\t\tc.results.setMatch(from, p.id, to)\n\t\t}\n\t\tif !foundMatch {\n\t\t\tbreak\n\t\t}\n\t}\n\tif match {\n\t\tif p.commit&noKeyword != 0 && c.isKeyword(from, to) {\n\t\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&whitespace == 0 && p.commit&failPass == 0 {\n\t\t\t\tc.failingParser = p\n\t\t\t}\n\t\t\tc.fail(from)\n\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\treturn\n\t\t}\n\t\tif failOffset > to {\n\t\t\tc.failOffset = failOffset\n\t\t\tc.failingParser = failingParser\n\t\t} else if to > initialFailOffset {\n\t\t\tc.failOffset = -1\n\t\t\tc.failingParser = nil\n\t\t} else {\n\t\t\tc.failOffset = initialFailOffset\n\t\t\tc.failingParser = initialFailingParser\n\t\t}\n\t\tc.success(to)\n\t\tc.results.unmarkPending(from, p.id)\n\t\treturn\n\t}\n\tif failOffset > initialFailOffset {\n\t\tc.failOffset = failOffset\n\t\tc.failingParser = failingParser\n\t\tif c.failingParser == nil && p.commitType()&userDefined != 0 && p.commitType()&whitespace == 0 && p.commitType()&failPass == 0 {\n\t\t\tc.failingParser = p\n\t\t}\n\t}\n\tc.results.setNoMatch(from, p.id)\n\tc.fail(from)\n\tc.results.unmarkPending(from, p.id)\n}\nfunc (b *choiceBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *choiceBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *choiceBuilder) build(c *context) ([]*node, bool) {\n\tto, ok := c.results.longestMatch(c.offset, b.id)\n\tif !ok {\n\t\treturn nil, false\n\t}\n\tfrom := c.offset\n\tparsed := to > from\n\tif parsed {\n\t\tc.results.dropMatchTo(c.offset, b.id, to)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.dropMatchTo(c.offset, g, to)\n\t\t}\n\t} else {\n\t\tif c.results.pending(c.offset, b.id) {\n\t\t\treturn nil, false\n\t\t}\n\t\tc.results.markPending(c.offset, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.markPending(c.offset, g)\n\t\t}\n\t}\n\tvar option builder\n\tfor _, o := range b.options {\n\t\tif c.results.hasMatchTo(c.offset, o.nodeID(), to) {\n\t\t\toption = o\n\t\t\tbreak\n\t\t}\n\t}\n\tn, _ := option.build(c)\n\tif !parsed {\n\t\tc.results.unmarkPending(from, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.unmarkPending(from, g)\n\t\t}\n\t}\n\tif b.commit&alias != 0 {\n\t\treturn n, true\n\t}\n\treturn []*node{{Name: b.name, From: from, To: to, Nodes: n, tokens: c.tokens}}, true\n}\n\ntype idSet struct{ ids []uint }\n\nfunc divModBits(id int) (int, int) {\n\treturn id / strconv.IntSize, id % strconv.IntSize\n}\nfunc (s *idSet) set(id int) {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\tif d < cap(s.ids) {\n\t\t\ts.ids = s.ids[:d+1]\n\t\t} else {\n\t\t\ts.ids = s.ids[:cap(s.ids)]\n\t\t\tfor i := cap(s.ids); i <= d; i++ {\n\t\t\t\ts.ids = append(s.ids, 0)\n\t\t\t}\n\t\t}\n\t}\n\ts.ids[d] |= 1 << uint(m)\n}\nfunc (s *idSet) unset(id int) {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\treturn\n\t}\n\ts.ids[d] &^= 1 << uint(m)\n}\nfunc (s *idSet) has(id int) bool {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\treturn false\n\t}\n\treturn s.ids[d]&(1< offset {\n\t\treturn ints\n\t}\n\tif cap(ints) > offset {\n\t\tints = ints[:offset+1]\n\t\treturn ints\n\t}\n\tints = ints[:cap(ints)]\n\tfor i := len(ints); i <= offset; i++ {\n\t\tints = append(ints, nil)\n\t}\n\treturn ints\n}\nfunc ensureOffsetIDs(ids []*idSet, offset int) []*idSet {\n\tif len(ids) > offset {\n\t\treturn ids\n\t}\n\tif cap(ids) > offset {\n\t\tids = ids[:offset+1]\n\t\treturn ids\n\t}\n\tids = ids[:cap(ids)]\n\tfor i := len(ids); i <= offset; i++ {\n\t\tids = append(ids, nil)\n\t}\n\treturn ids\n}\nfunc (r *results) setMatch(offset, id, to int) {\n\tr.match = ensureOffsetInts(r.match, offset)\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id || r.match[offset][i+1] != to {\n\t\t\tcontinue\n\t\t}\n\t\treturn\n\t}\n\tr.match[offset] = append(r.match[offset], id, to)\n}\nfunc (r *results) setNoMatch(offset, id int) {\n\tif len(r.match) > offset {\n\t\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\t\tif r.match[offset][i] != id {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t}\n\tr.noMatch = ensureOffsetIDs(r.noMatch, offset)\n\tif r.noMatch[offset] == nil {\n\t\tr.noMatch[offset] = &idSet{}\n\t}\n\tr.noMatch[offset].set(id)\n}\nfunc (r *results) hasMatchTo(offset, id, to int) bool {\n\tif len(r.match) <= offset {\n\t\treturn false\n\t}\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] == to {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (r *results) longestMatch(offset, id int) (int, bool) {\n\tif len(r.match) <= offset {\n\t\treturn 0, false\n\t}\n\tvar found bool\n\tto := -1\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] > to {\n\t\t\tto = r.match[offset][i+1]\n\t\t}\n\t\tfound = true\n\t}\n\treturn to, found\n}\nfunc (r *results) longestResult(offset, id int) (int, bool, bool) {\n\tif len(r.noMatch) > offset && r.noMatch[offset] != nil && r.noMatch[offset].has(id) {\n\t\treturn 0, false, true\n\t}\n\tto, ok := r.longestMatch(offset, id)\n\treturn to, ok, ok\n}\nfunc (r *results) dropMatchTo(offset, id, to int) {\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] == to {\n\t\t\tr.match[offset][i] = -1\n\t\t\treturn\n\t\t}\n\t}\n}\nfunc (r *results) resetPending() {\n\tr.isPending = nil\n}\nfunc (r *results) pending(offset, id int) bool {\n\tif len(r.isPending) <= id {\n\t\treturn false\n\t}\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == offset {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (r *results) markPending(offset, id int) {\n\tr.isPending = ensureOffsetInts(r.isPending, id)\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == -1 {\n\t\t\tr.isPending[id][i] = offset\n\t\t\treturn\n\t\t}\n\t}\n\tr.isPending[id] = append(r.isPending[id], offset)\n}\nfunc (r *results) unmarkPending(offset, id int) {\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == offset {\n\t\t\tr.isPending[id][i] = -1\n\t\t\tbreak\n\t\t}\n\t}\n}\n\ntype context struct {\n\treader\t\tio.RuneReader\n\tkeywords\t[]parser\n\toffset\t\tint\n\treadOffset\tint\n\tconsumed\tint\n\toffsetLimit\tint\n\tfailOffset\tint\n\tfailingParser\tparser\n\treadErr\t\terror\n\teof\t\tbool\n\tresults\t\t*results\n\ttokens\t\t[]rune\n\tmatchLast\tbool\n}\n\nfunc newContext(r io.RuneReader, keywords []parser) *context {\n\treturn &context{reader: r, keywords: keywords, results: &results{}, offsetLimit: -1, failOffset: -1}\n}\nfunc (c *context) read() bool {\n\tif c.eof || c.readErr != nil {\n\t\treturn false\n\t}\n\ttoken, n, err := c.reader.ReadRune()\n\tif err != nil {\n\t\tif errors.Is(err, io.EOF) {\n\t\t\tif n == 0 {\n\t\t\t\tc.eof = true\n\t\t\t\treturn false\n\t\t\t}\n\t\t} else {\n\t\t\tc.readErr = err\n\t\t\treturn false\n\t\t}\n\t}\n\tc.readOffset++\n\tif token == unicode.ReplacementChar {\n\t\tc.readErr = errInvalidUnicodeCharacter\n\t\treturn false\n\t}\n\tc.tokens = append(c.tokens, token)\n\treturn true\n}\nfunc (c *context) token() (rune, bool) {\n\tif c.offset == c.offsetLimit {\n\t\treturn 0, false\n\t}\n\tif c.offset == c.readOffset {\n\t\tif !c.read() {\n\t\t\treturn 0, false\n\t\t}\n\t}\n\treturn c.tokens[c.offset], true\n}\nfunc (c *context) fromResults(p parser) bool {\n\tto, m, ok := c.results.longestResult(c.offset, p.nodeID())\n\tif !ok {\n\t\treturn false\n\t}\n\tif m {\n\t\tc.success(to)\n\t} else {\n\t\tc.fail(c.offset)\n\t}\n\treturn true\n}\nfunc (c *context) isKeyword(from, to int) bool {\n\tol := c.offsetLimit\n\tc.offsetLimit = to\n\tdefer func() {\n\t\tc.offsetLimit = ol\n\t}()\n\tfor _, kw := range c.keywords {\n\t\tc.offset = from\n\t\tkw.parse(c)\n\t\tif c.matchLast && c.offset == to {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (c *context) success(to int) {\n\tc.offset = to\n\tc.matchLast = true\n\tif to > c.consumed {\n\t\tc.consumed = to\n\t}\n}\nfunc (c *context) fail(offset int) {\n\tc.offset = offset\n\tc.matchLast = false\n}\nfunc findLine(tokens []rune, offset int) (line, column int) {\n\ttokens = tokens[:offset]\n\tfor i := range tokens {\n\t\tcolumn++\n\t\tif tokens[i] == '\\n' {\n\t\t\tcolumn = 0\n\t\t\tline++\n\t\t}\n\t}\n\treturn\n}\nfunc (c *context) parseError(p parser) error {\n\tdefinition := p.nodeName()\n\tflagIndex := strings.Index(definition, \":\")\n\tif flagIndex > 0 {\n\t\tdefinition = definition[:flagIndex]\n\t}\n\tif c.failingParser == nil {\n\t\tc.failOffset = c.consumed\n\t}\n\tline, col := findLine(c.tokens, c.failOffset)\n\treturn &parseError{Offset: c.failOffset, Line: line, Column: col, Definition: definition}\n}\nfunc (c *context) finalizeParse(root parser) error {\n\tfp := c.failingParser\n\tif fp == nil {\n\t\tfp = root\n\t}\n\tto, match, found := c.results.longestResult(0, root.nodeID())\n\tif !found || !match || found && match && to < c.readOffset {\n\t\treturn c.parseError(fp)\n\t}\n\tc.read()\n\tif c.eof {\n\t\treturn nil\n\t}\n\tif c.readErr != nil {\n\t\treturn c.readErr\n\t}\n\treturn c.parseError(root)\n}\n\ntype node struct {\n\tName\t\tstring\n\tNodes\t\t[]*node\n\tFrom, To\tint\n\ttokens\t\t[]rune\n}\n\nfunc (n *node) Tokens() []rune {\n\treturn n.tokens\n}\nfunc (n *node) String() string {\n\treturn fmt.Sprintf(\"%s:%d:%d:%s\", n.Name, n.From, n.To, n.Text())\n}\nfunc (n *node) Text() string {\n\treturn string(n.Tokens()[n.From:n.To])\n}\n\ntype commitType int\n\nconst (\n\tnone\tcommitType\t= 0\n\talias\tcommitType\t= 1 << iota\n\twhitespace\n\tnoWhitespace\n\tkeyword\n\tnoKeyword\n\tfailPass\n\troot\n\tuserDefined\n)\n\ntype formatFlags int\n\nconst (\n\tformatNone\tformatFlags\t= 0\n\tformatPretty\tformatFlags\t= 1 << iota\n\tformatIncludeComments\n)\n\ntype parseError struct {\n\tInput\t\tstring\n\tOffset\t\tint\n\tLine\t\tint\n\tColumn\t\tint\n\tDefinition\tstring\n}\ntype parser interface {\n\tnodeName() string\n\tnodeID() int\n\tcommitType() commitType\n\tparse(*context)\n}\ntype builder interface {\n\tnodeName() string\n\tnodeID() int\n\tbuild(*context) ([]*node, bool)\n}\n\nvar errInvalidUnicodeCharacter = errors.New(\"invalid unicode character\")\n\nfunc (pe *parseError) Error() string {\n\treturn fmt.Sprintf(\"%s:%d:%d:parse failed, parsing: %s\", pe.Input, pe.Line+1, pe.Column+1, pe.Definition)\n}\nfunc parseInput(r io.Reader, p parser, b builder, kw []parser) (*node, error) {\n\tc := newContext(bufio.NewReader(r), kw)\n\tp.parse(c)\n\tif c.readErr != nil {\n\t\treturn nil, c.readErr\n\t}\n\tif err := c.finalizeParse(p); err != nil {\n\t\tif perr, ok := err.(*parseError); ok {\n\t\t\tperr.Input = \"\"\n\t\t}\n\t\treturn nil, err\n\t}\n\tc.offset = 0\n\tc.results.resetPending()\n\tn, _ := b.build(c)\n\treturn n[0], nil\n}\n" +const headCode = "import (\n\t\"strconv\"\n\t\"errors\"\n\t\"io\"\n\t\"strings\"\n\t\"unicode\"\n\t\"fmt\"\n\t\"bufio\"\n)\n\ntype charParser struct {\n\tname\tstring\n\tid\tint\n\tnot\tbool\n\tchars\t[]rune\n\tranges\t[][]rune\n}\ntype charBuilder struct {\n\tname\tstring\n\tid\tint\n}\n\nfunc (p *charParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *charParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *charParser) commitType() commitType {\n\treturn alias\n}\nfunc matchChar(chars []rune, ranges [][]rune, not bool, char rune) bool {\n\tfor _, ci := range chars {\n\t\tif ci == char {\n\t\t\treturn !not\n\t\t}\n\t}\n\tfor _, ri := range ranges {\n\t\tif char >= ri[0] && char <= ri[1] {\n\t\t\treturn !not\n\t\t}\n\t}\n\treturn not\n}\nfunc (p *charParser) match(t rune) bool {\n\treturn matchChar(p.chars, p.ranges, p.not, t)\n}\nfunc (p *charParser) parse(c *context) {\n\tif tok, ok := c.token(); !ok || !p.match(tok) {\n\t\tif c.offset > c.failOffset {\n\t\t\tc.failOffset = c.offset\n\t\t\tc.failingParser = nil\n\t\t}\n\t\tc.fail(c.offset)\n\t\treturn\n\t}\n\tc.success(c.offset + 1)\n}\nfunc (b *charBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *charBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *charBuilder) build(c *context) ([]*node, bool) {\n\treturn nil, false\n}\n\ntype sequenceParser struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tcommitType\n\titems\t\t[]parser\n\tranges\t\t[][]int\n\tgeneralizations\t[]int\n\tallChars\tbool\n}\ntype sequenceBuilder struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tcommitType\n\titems\t\t[]builder\n\tranges\t\t[][]int\n\tgeneralizations\t[]int\n\tallChars\tbool\n}\n\nfunc (p *sequenceParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *sequenceParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *sequenceParser) commitType() commitType {\n\treturn p.commit\n}\nfunc (p *sequenceParser) parse(c *context) {\n\tif !p.allChars {\n\t\tif c.results.pending(c.offset, p.id) {\n\t\t\tc.fail(c.offset)\n\t\t\treturn\n\t\t}\n\t\tc.results.markPending(c.offset, p.id)\n\t}\n\tvar (\n\t\tcurrentCount\tint\n\t\tparsed\t\tbool\n\t)\n\titemIndex := 0\n\tfrom := c.offset\n\tto := c.offset\n\tfor itemIndex < len(p.items) {\n\t\tp.items[itemIndex].parse(c)\n\t\tif !c.matchLast {\n\t\t\tif currentCount >= p.ranges[itemIndex][0] {\n\t\t\t\titemIndex++\n\t\t\t\tcurrentCount = 0\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tc.offset = from\n\t\t\tif c.fromResults(p) {\n\t\t\t\tif to > c.failOffset {\n\t\t\t\t\tc.failOffset = -1\n\t\t\t\t\tc.failingParser = nil\n\t\t\t\t}\n\t\t\t\tif !p.allChars {\n\t\t\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\t\t}\n\t\t\t\treturn\n\t\t\t}\n\t\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&whitespace == 0 && p.commit&failPass == 0 {\n\t\t\t\tc.failingParser = p\n\t\t\t}\n\t\t\tc.fail(from)\n\t\t\tif !p.allChars {\n\t\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t\tparsed = c.offset > to\n\t\tif parsed {\n\t\t\tcurrentCount++\n\t\t}\n\t\tto = c.offset\n\t\tif !parsed || p.ranges[itemIndex][1] > 0 && currentCount == p.ranges[itemIndex][1] {\n\t\t\titemIndex++\n\t\t\tcurrentCount = 0\n\t\t}\n\t}\n\tif p.commit&noKeyword != 0 && c.isKeyword(from, to) {\n\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&whitespace == 0 && p.commit&failPass == 0 {\n\t\t\tc.failingParser = p\n\t\t}\n\t\tc.fail(from)\n\t\tif !p.allChars {\n\t\t\tc.results.unmarkPending(from, p.id)\n\t\t}\n\t\treturn\n\t}\n\tfor _, g := range p.generalizations {\n\t\tif c.results.pending(from, g) {\n\t\t\tc.results.setMatch(from, g, to)\n\t\t}\n\t}\n\tif to > c.failOffset {\n\t\tc.failOffset = -1\n\t\tc.failingParser = nil\n\t}\n\tc.results.setMatch(from, p.id, to)\n\tc.success(to)\n\tif !p.allChars {\n\t\tc.results.unmarkPending(from, p.id)\n\t}\n}\nfunc (b *sequenceBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *sequenceBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *sequenceBuilder) build(c *context) ([]*node, bool) {\n\tto, ok := c.results.longestMatch(c.offset, b.id)\n\tif !ok {\n\t\treturn nil, false\n\t}\n\tfrom := c.offset\n\tparsed := to > from\n\tif b.allChars {\n\t\tc.offset = to\n\t\tif b.commit&alias != 0 {\n\t\t\treturn nil, true\n\t\t}\n\t\treturn []*node{{Name: b.name, From: from, To: to, tokens: c.tokens}}, true\n\t} else if parsed {\n\t\tc.results.dropMatchTo(c.offset, b.id, to)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.dropMatchTo(c.offset, g, to)\n\t\t}\n\t} else {\n\t\tif c.results.pending(c.offset, b.id) {\n\t\t\treturn nil, false\n\t\t}\n\t\tc.results.markPending(c.offset, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.markPending(c.offset, g)\n\t\t}\n\t}\n\tvar (\n\t\titemIndex\tint\n\t\tcurrentCount\tint\n\t\tnodes\t\t[]*node\n\t)\n\tfor itemIndex < len(b.items) {\n\t\titemFrom := c.offset\n\t\tn, ok := b.items[itemIndex].build(c)\n\t\tif !ok {\n\t\t\titemIndex++\n\t\t\tcurrentCount = 0\n\t\t\tcontinue\n\t\t}\n\t\tif c.offset > itemFrom {\n\t\t\tnodes = append(nodes, n...)\n\t\t\tcurrentCount++\n\t\t\tif b.ranges[itemIndex][1] > 0 && currentCount == b.ranges[itemIndex][1] {\n\t\t\t\titemIndex++\n\t\t\t\tcurrentCount = 0\n\t\t\t}\n\t\t\tcontinue\n\t\t}\n\t\tif currentCount < b.ranges[itemIndex][0] {\n\t\t\tfor i := 0; i < b.ranges[itemIndex][0]-currentCount; i++ {\n\t\t\t\tnodes = append(nodes, n...)\n\t\t\t}\n\t\t}\n\t\titemIndex++\n\t\tcurrentCount = 0\n\t}\n\tif !parsed {\n\t\tc.results.unmarkPending(from, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.unmarkPending(from, g)\n\t\t}\n\t}\n\tif b.commit&alias != 0 {\n\t\treturn nodes, true\n\t}\n\treturn []*node{{Name: b.name, From: from, To: to, Nodes: nodes, tokens: c.tokens}}, true\n}\n\ntype choiceParser struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tcommitType\n\toptions\t\t[]parser\n\tgeneralizations\t[]int\n}\ntype choiceBuilder struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tcommitType\n\toptions\t\t[]builder\n\tgeneralizations\t[]int\n}\n\nfunc (p *choiceParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *choiceParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *choiceParser) commitType() commitType {\n\treturn p.commit\n}\nfunc (p *choiceParser) parse(c *context) {\n\tif c.fromResults(p) {\n\t\treturn\n\t}\n\tif c.results.pending(c.offset, p.id) {\n\t\tc.fail(c.offset)\n\t\treturn\n\t}\n\tc.results.markPending(c.offset, p.id)\n\tvar (\n\t\tmatch\t\tbool\n\t\toptionIndex\tint\n\t\tfoundMatch\tbool\n\t\tfailingParser\tparser\n\t)\n\tfrom := c.offset\n\tto := c.offset\n\tinitialFailOffset := c.failOffset\n\tinitialFailingParser := c.failingParser\n\tfailOffset := initialFailOffset\n\tfor {\n\t\tfoundMatch = false\n\t\toptionIndex = 0\n\t\tfor optionIndex < len(p.options) {\n\t\t\tp.options[optionIndex].parse(c)\n\t\t\toptionIndex++\n\t\t\tif !c.matchLast {\n\t\t\t\tif c.failOffset > failOffset {\n\t\t\t\t\tfailOffset = c.failOffset\n\t\t\t\t\tfailingParser = c.failingParser\n\t\t\t\t}\n\t\t\t}\n\t\t\tif !c.matchLast || match && c.offset <= to {\n\t\t\t\tc.offset = from\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tmatch = true\n\t\t\tfoundMatch = true\n\t\t\tto = c.offset\n\t\t\tc.offset = from\n\t\t\tc.results.setMatch(from, p.id, to)\n\t\t}\n\t\tif !foundMatch {\n\t\t\tbreak\n\t\t}\n\t}\n\tif match {\n\t\tif p.commit&noKeyword != 0 && c.isKeyword(from, to) {\n\t\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&whitespace == 0 && p.commit&failPass == 0 {\n\t\t\t\tc.failingParser = p\n\t\t\t}\n\t\t\tc.fail(from)\n\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\treturn\n\t\t}\n\t\tif failOffset > to {\n\t\t\tc.failOffset = failOffset\n\t\t\tc.failingParser = failingParser\n\t\t} else if to > initialFailOffset {\n\t\t\tc.failOffset = -1\n\t\t\tc.failingParser = nil\n\t\t} else {\n\t\t\tc.failOffset = initialFailOffset\n\t\t\tc.failingParser = initialFailingParser\n\t\t}\n\t\tc.success(to)\n\t\tc.results.unmarkPending(from, p.id)\n\t\treturn\n\t}\n\tif failOffset > initialFailOffset {\n\t\tc.failOffset = failOffset\n\t\tc.failingParser = failingParser\n\t\tif c.failingParser == nil && p.commitType()&userDefined != 0 && p.commitType()&whitespace == 0 && p.commitType()&failPass == 0 {\n\t\t\tc.failingParser = p\n\t\t}\n\t}\n\tc.results.setNoMatch(from, p.id)\n\tc.fail(from)\n\tc.results.unmarkPending(from, p.id)\n}\nfunc (b *choiceBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *choiceBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *choiceBuilder) build(c *context) ([]*node, bool) {\n\tto, ok := c.results.longestMatch(c.offset, b.id)\n\tif !ok {\n\t\treturn nil, false\n\t}\n\tfrom := c.offset\n\tparsed := to > from\n\tif parsed {\n\t\tc.results.dropMatchTo(c.offset, b.id, to)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.dropMatchTo(c.offset, g, to)\n\t\t}\n\t} else {\n\t\tif c.results.pending(c.offset, b.id) {\n\t\t\treturn nil, false\n\t\t}\n\t\tc.results.markPending(c.offset, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.markPending(c.offset, g)\n\t\t}\n\t}\n\tvar option builder\n\tfor _, o := range b.options {\n\t\tif c.results.hasMatchTo(c.offset, o.nodeID(), to) {\n\t\t\toption = o\n\t\t\tbreak\n\t\t}\n\t}\n\tn, _ := option.build(c)\n\tif !parsed {\n\t\tc.results.unmarkPending(from, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.unmarkPending(from, g)\n\t\t}\n\t}\n\tif b.commit&alias != 0 {\n\t\treturn n, true\n\t}\n\treturn []*node{{Name: b.name, From: from, To: to, Nodes: n, tokens: c.tokens}}, true\n}\n\ntype idSet struct{ ids []uint }\n\nfunc divModBits(id int) (int, int) {\n\treturn id / strconv.IntSize, id % strconv.IntSize\n}\nfunc (s *idSet) set(id int) {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\tif d < cap(s.ids) {\n\t\t\ts.ids = s.ids[:d+1]\n\t\t} else {\n\t\t\ts.ids = s.ids[:cap(s.ids)]\n\t\t\tfor i := cap(s.ids); i <= d; i++ {\n\t\t\t\ts.ids = append(s.ids, 0)\n\t\t\t}\n\t\t}\n\t}\n\ts.ids[d] |= 1 << uint(m)\n}\nfunc (s *idSet) unset(id int) {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\treturn\n\t}\n\ts.ids[d] &^= 1 << uint(m)\n}\nfunc (s *idSet) has(id int) bool {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\treturn false\n\t}\n\treturn s.ids[d]&(1< offset {\n\t\treturn ints\n\t}\n\tif cap(ints) > offset {\n\t\tints = ints[:offset+1]\n\t\treturn ints\n\t}\n\tints = ints[:cap(ints)]\n\tfor i := len(ints); i <= offset; i++ {\n\t\tints = append(ints, nil)\n\t}\n\treturn ints\n}\nfunc ensureOffsetIDs(ids []*idSet, offset int) []*idSet {\n\tif len(ids) > offset {\n\t\treturn ids\n\t}\n\tif cap(ids) > offset {\n\t\tids = ids[:offset+1]\n\t\treturn ids\n\t}\n\tids = ids[:cap(ids)]\n\tfor i := len(ids); i <= offset; i++ {\n\t\tids = append(ids, nil)\n\t}\n\treturn ids\n}\nfunc (r *results) setMatch(offset, id, to int) {\n\tr.match = ensureOffsetInts(r.match, offset)\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id || r.match[offset][i+1] != to {\n\t\t\tcontinue\n\t\t}\n\t\treturn\n\t}\n\tr.match[offset] = append(r.match[offset], id, to)\n}\nfunc (r *results) setNoMatch(offset, id int) {\n\tif len(r.match) > offset {\n\t\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\t\tif r.match[offset][i] != id {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t}\n\tr.noMatch = ensureOffsetIDs(r.noMatch, offset)\n\tif r.noMatch[offset] == nil {\n\t\tr.noMatch[offset] = &idSet{}\n\t}\n\tr.noMatch[offset].set(id)\n}\nfunc (r *results) hasMatchTo(offset, id, to int) bool {\n\tif len(r.match) <= offset {\n\t\treturn false\n\t}\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] == to {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (r *results) longestMatch(offset, id int) (int, bool) {\n\tif len(r.match) <= offset {\n\t\treturn 0, false\n\t}\n\tvar found bool\n\tto := -1\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] > to {\n\t\t\tto = r.match[offset][i+1]\n\t\t}\n\t\tfound = true\n\t}\n\treturn to, found\n}\nfunc (r *results) longestResult(offset, id int) (int, bool, bool) {\n\tif len(r.noMatch) > offset && r.noMatch[offset] != nil && r.noMatch[offset].has(id) {\n\t\treturn 0, false, true\n\t}\n\tto, ok := r.longestMatch(offset, id)\n\treturn to, ok, ok\n}\nfunc (r *results) dropMatchTo(offset, id, to int) {\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] == to {\n\t\t\tr.match[offset][i] = -1\n\t\t\treturn\n\t\t}\n\t}\n}\nfunc (r *results) resetPending() {\n\tr.isPending = nil\n}\nfunc (r *results) pending(offset, id int) bool {\n\tif len(r.isPending) <= id {\n\t\treturn false\n\t}\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == offset {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (r *results) markPending(offset, id int) {\n\tr.isPending = ensureOffsetInts(r.isPending, id)\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == -1 {\n\t\t\tr.isPending[id][i] = offset\n\t\t\treturn\n\t\t}\n\t}\n\tr.isPending[id] = append(r.isPending[id], offset)\n}\nfunc (r *results) unmarkPending(offset, id int) {\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == offset {\n\t\t\tr.isPending[id][i] = -1\n\t\t\tbreak\n\t\t}\n\t}\n}\n\ntype context struct {\n\treader\t\tio.RuneReader\n\tkeywords\t[]parser\n\toffset\t\tint\n\treadOffset\tint\n\tconsumed\tint\n\toffsetLimit\tint\n\tfailOffset\tint\n\tfailingParser\tparser\n\treadErr\t\terror\n\teof\t\tbool\n\tresults\t\t*results\n\ttokens\t\t[]rune\n\tmatchLast\tbool\n}\n\nfunc newContext(r io.RuneReader, keywords []parser) *context {\n\treturn &context{reader: r, keywords: keywords, results: &results{}, offsetLimit: -1, failOffset: -1}\n}\nfunc (c *context) read() bool {\n\tif c.eof || c.readErr != nil {\n\t\treturn false\n\t}\n\ttoken, n, err := c.reader.ReadRune()\n\tif err != nil {\n\t\tif errors.Is(err, io.EOF) {\n\t\t\tif n == 0 {\n\t\t\t\tc.eof = true\n\t\t\t\treturn false\n\t\t\t}\n\t\t} else {\n\t\t\tc.readErr = err\n\t\t\treturn false\n\t\t}\n\t}\n\tc.readOffset++\n\tif token == unicode.ReplacementChar {\n\t\tc.readErr = errInvalidUnicodeCharacter\n\t\treturn false\n\t}\n\tc.tokens = append(c.tokens, token)\n\treturn true\n}\nfunc (c *context) token() (rune, bool) {\n\tif c.offset == c.offsetLimit {\n\t\treturn 0, false\n\t}\n\tif c.offset == c.readOffset {\n\t\tif !c.read() {\n\t\t\treturn 0, false\n\t\t}\n\t}\n\treturn c.tokens[c.offset], true\n}\nfunc (c *context) fromResults(p parser) bool {\n\tto, m, ok := c.results.longestResult(c.offset, p.nodeID())\n\tif !ok {\n\t\treturn false\n\t}\n\tif m {\n\t\tc.success(to)\n\t} else {\n\t\tc.fail(c.offset)\n\t}\n\treturn true\n}\nfunc (c *context) isKeyword(from, to int) bool {\n\tol := c.offsetLimit\n\tc.offsetLimit = to\n\tdefer func() {\n\t\tc.offsetLimit = ol\n\t}()\n\tfor _, kw := range c.keywords {\n\t\tc.offset = from\n\t\tkw.parse(c)\n\t\tif c.matchLast && c.offset == to {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (c *context) success(to int) {\n\tc.offset = to\n\tc.matchLast = true\n\tif to > c.consumed {\n\t\tc.consumed = to\n\t}\n}\nfunc (c *context) fail(offset int) {\n\tc.offset = offset\n\tc.matchLast = false\n}\nfunc findLine(tokens []rune, offset int) (line, column int) {\n\ttokens = tokens[:offset]\n\tfor i := range tokens {\n\t\tcolumn++\n\t\tif tokens[i] == '\\n' {\n\t\t\tcolumn = 0\n\t\t\tline++\n\t\t}\n\t}\n\treturn\n}\nfunc (c *context) parseError(p parser) error {\n\tdefinition := p.nodeName()\n\tflagIndex := strings.Index(definition, \":\")\n\tif flagIndex > 0 {\n\t\tdefinition = definition[:flagIndex]\n\t}\n\tif c.failingParser == nil {\n\t\tc.failOffset = c.consumed\n\t}\n\tline, col := findLine(c.tokens, c.failOffset)\n\treturn &parseError{Offset: c.failOffset, Line: line, Column: col, Definition: definition}\n}\nfunc (c *context) finalizeParse(root parser) error {\n\tfp := c.failingParser\n\tif fp == nil {\n\t\tfp = root\n\t}\n\tto, match, found := c.results.longestResult(0, root.nodeID())\n\tif !found || !match || found && match && to < c.readOffset {\n\t\treturn c.parseError(fp)\n\t}\n\tc.read()\n\tif c.eof {\n\t\treturn nil\n\t}\n\tif c.readErr != nil {\n\t\treturn c.readErr\n\t}\n\treturn c.parseError(root)\n}\n\ntype node struct {\n\tName\tstring\n\tNodes\t[]*node\n\tFrom\tint\n\tTo\tint\n\ttokens\t[]rune\n}\n\nfunc (n *node) Tokens() []rune {\n\treturn n.tokens\n}\nfunc (n *node) String() string {\n\treturn fmt.Sprintf(\"%s:%d:%d:%s\", n.Name, n.From, n.To, n.Text())\n}\nfunc (n *node) Text() string {\n\treturn string(n.Tokens()[n.From:n.To])\n}\n\ntype commitType int\n\nconst (\n\tnone\tcommitType\t= 0\n\talias\tcommitType\t= 1 << iota\n\twhitespace\n\tnoWhitespace\n\tkeyword\n\tnoKeyword\n\tfailPass\n\troot\n\tuserDefined\n)\n\ntype formatFlags int\n\nconst (\n\tformatNone\tformatFlags\t= 0\n\tformatPretty\tformatFlags\t= 1 << iota\n\tformatIncludeComments\n)\n\ntype parseError struct {\n\tInput\t\tstring\n\tOffset\t\tint\n\tLine\t\tint\n\tColumn\t\tint\n\tDefinition\tstring\n}\ntype parser interface {\n\tnodeName() string\n\tnodeID() int\n\tcommitType() commitType\n\tparse(*context)\n}\ntype builder interface {\n\tnodeName() string\n\tnodeID() int\n\tbuild(*context) ([]*node, bool)\n}\n\nvar errInvalidUnicodeCharacter = errors.New(\"invalid unicode character\")\n\nfunc (pe *parseError) Error() string {\n\treturn fmt.Sprintf(\"%s:%d:%d:parse failed, parsing: %s\", pe.Input, pe.Line+1, pe.Column+1, pe.Definition)\n}\nfunc parseInput(r io.Reader, p parser, b builder, kw []parser) (*node, error) {\n\tc := newContext(bufio.NewReader(r), kw)\n\tp.parse(c)\n\tif c.readErr != nil {\n\t\treturn nil, c.readErr\n\t}\n\tif err := c.finalizeParse(p); err != nil {\n\t\tif perr, ok := err.(*parseError); ok {\n\t\t\tperr.Input = \"\"\n\t\t}\n\t\treturn nil, err\n\t}\n\tc.offset = 0\n\tc.results.resetPending()\n\tn, _ := b.build(c)\n\treturn n[0], nil\n}\n" diff --git a/headexported.go b/headexported.go index d1c355d..b4189c0 100644 --- a/headexported.go +++ b/headexported.go @@ -1,4 +1,4 @@ package treerack // generated with scripts/createhead.go -const headCodeExported = "import (\n\t\"strconv\"\n\t\"errors\"\n\t\"io\"\n\t\"strings\"\n\t\"unicode\"\n\t\"fmt\"\n\t\"bufio\"\n)\n\ntype charParser struct {\n\tname\tstring\n\tid\tint\n\tnot\tbool\n\tchars\t[]rune\n\tranges\t[][]rune\n}\ntype charBuilder struct {\n\tname\tstring\n\tid\tint\n}\n\nfunc (p *charParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *charParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *charParser) commitType() CommitType {\n\treturn Alias\n}\nfunc matchChar(chars []rune, ranges [][]rune, not bool, char rune) bool {\n\tfor _, ci := range chars {\n\t\tif ci == char {\n\t\t\treturn !not\n\t\t}\n\t}\n\tfor _, ri := range ranges {\n\t\tif char >= ri[0] && char <= ri[1] {\n\t\t\treturn !not\n\t\t}\n\t}\n\treturn not\n}\nfunc (p *charParser) match(t rune) bool {\n\treturn matchChar(p.chars, p.ranges, p.not, t)\n}\nfunc (p *charParser) parse(c *context) {\n\tif tok, ok := c.token(); !ok || !p.match(tok) {\n\t\tif c.offset > c.failOffset {\n\t\t\tc.failOffset = c.offset\n\t\t\tc.failingParser = nil\n\t\t}\n\t\tc.fail(c.offset)\n\t\treturn\n\t}\n\tc.success(c.offset + 1)\n}\nfunc (b *charBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *charBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *charBuilder) build(c *context) ([]*Node, bool) {\n\treturn nil, false\n}\n\ntype sequenceParser struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tCommitType\n\titems\t\t[]parser\n\tranges\t\t[][]int\n\tgeneralizations\t[]int\n\tallChars\tbool\n}\ntype sequenceBuilder struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tCommitType\n\titems\t\t[]builder\n\tranges\t\t[][]int\n\tgeneralizations\t[]int\n\tallChars\tbool\n}\n\nfunc (p *sequenceParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *sequenceParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *sequenceParser) commitType() CommitType {\n\treturn p.commit\n}\nfunc (p *sequenceParser) parse(c *context) {\n\tif !p.allChars {\n\t\tif c.results.pending(c.offset, p.id) {\n\t\t\tc.fail(c.offset)\n\t\t\treturn\n\t\t}\n\t\tc.results.markPending(c.offset, p.id)\n\t}\n\tvar (\n\t\tcurrentCount\tint\n\t\tparsed\t\tbool\n\t)\n\titemIndex := 0\n\tfrom := c.offset\n\tto := c.offset\n\tfor itemIndex < len(p.items) {\n\t\tp.items[itemIndex].parse(c)\n\t\tif !c.matchLast {\n\t\t\tif currentCount >= p.ranges[itemIndex][0] {\n\t\t\t\titemIndex++\n\t\t\t\tcurrentCount = 0\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tc.offset = from\n\t\t\tif c.fromResults(p) {\n\t\t\t\tif to > c.failOffset {\n\t\t\t\t\tc.failOffset = -1\n\t\t\t\t\tc.failingParser = nil\n\t\t\t\t}\n\t\t\t\tif !p.allChars {\n\t\t\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\t\t}\n\t\t\t\treturn\n\t\t\t}\n\t\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 {\n\t\t\t\tc.failingParser = p\n\t\t\t}\n\t\t\tc.fail(from)\n\t\t\tif !p.allChars {\n\t\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t\tparsed = c.offset > to\n\t\tif parsed {\n\t\t\tcurrentCount++\n\t\t}\n\t\tto = c.offset\n\t\tif !parsed || p.ranges[itemIndex][1] > 0 && currentCount == p.ranges[itemIndex][1] {\n\t\t\titemIndex++\n\t\t\tcurrentCount = 0\n\t\t}\n\t}\n\tif p.commit&NoKeyword != 0 && c.isKeyword(from, to) {\n\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 {\n\t\t\tc.failingParser = p\n\t\t}\n\t\tc.fail(from)\n\t\tif !p.allChars {\n\t\t\tc.results.unmarkPending(from, p.id)\n\t\t}\n\t\treturn\n\t}\n\tfor _, g := range p.generalizations {\n\t\tif c.results.pending(from, g) {\n\t\t\tc.results.setMatch(from, g, to)\n\t\t}\n\t}\n\tif to > c.failOffset {\n\t\tc.failOffset = -1\n\t\tc.failingParser = nil\n\t}\n\tc.results.setMatch(from, p.id, to)\n\tc.success(to)\n\tif !p.allChars {\n\t\tc.results.unmarkPending(from, p.id)\n\t}\n}\nfunc (b *sequenceBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *sequenceBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *sequenceBuilder) build(c *context) ([]*Node, bool) {\n\tto, ok := c.results.longestMatch(c.offset, b.id)\n\tif !ok {\n\t\treturn nil, false\n\t}\n\tfrom := c.offset\n\tparsed := to > from\n\tif b.allChars {\n\t\tc.offset = to\n\t\tif b.commit&Alias != 0 {\n\t\t\treturn nil, true\n\t\t}\n\t\treturn []*Node{{Name: b.name, From: from, To: to, tokens: c.tokens}}, true\n\t} else if parsed {\n\t\tc.results.dropMatchTo(c.offset, b.id, to)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.dropMatchTo(c.offset, g, to)\n\t\t}\n\t} else {\n\t\tif c.results.pending(c.offset, b.id) {\n\t\t\treturn nil, false\n\t\t}\n\t\tc.results.markPending(c.offset, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.markPending(c.offset, g)\n\t\t}\n\t}\n\tvar (\n\t\titemIndex\tint\n\t\tcurrentCount\tint\n\t\tnodes\t\t[]*Node\n\t)\n\tfor itemIndex < len(b.items) {\n\t\titemFrom := c.offset\n\t\tn, ok := b.items[itemIndex].build(c)\n\t\tif !ok {\n\t\t\titemIndex++\n\t\t\tcurrentCount = 0\n\t\t\tcontinue\n\t\t}\n\t\tif c.offset > itemFrom {\n\t\t\tnodes = append(nodes, n...)\n\t\t\tcurrentCount++\n\t\t\tif b.ranges[itemIndex][1] > 0 && currentCount == b.ranges[itemIndex][1] {\n\t\t\t\titemIndex++\n\t\t\t\tcurrentCount = 0\n\t\t\t}\n\t\t\tcontinue\n\t\t}\n\t\tif currentCount < b.ranges[itemIndex][0] {\n\t\t\tfor i := 0; i < b.ranges[itemIndex][0]-currentCount; i++ {\n\t\t\t\tnodes = append(nodes, n...)\n\t\t\t}\n\t\t}\n\t\titemIndex++\n\t\tcurrentCount = 0\n\t}\n\tif !parsed {\n\t\tc.results.unmarkPending(from, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.unmarkPending(from, g)\n\t\t}\n\t}\n\tif b.commit&Alias != 0 {\n\t\treturn nodes, true\n\t}\n\treturn []*Node{{Name: b.name, From: from, To: to, Nodes: nodes, tokens: c.tokens}}, true\n}\n\ntype choiceParser struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tCommitType\n\toptions\t\t[]parser\n\tgeneralizations\t[]int\n}\ntype choiceBuilder struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tCommitType\n\toptions\t\t[]builder\n\tgeneralizations\t[]int\n}\n\nfunc (p *choiceParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *choiceParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *choiceParser) commitType() CommitType {\n\treturn p.commit\n}\nfunc (p *choiceParser) parse(c *context) {\n\tif c.fromResults(p) {\n\t\treturn\n\t}\n\tif c.results.pending(c.offset, p.id) {\n\t\tc.fail(c.offset)\n\t\treturn\n\t}\n\tc.results.markPending(c.offset, p.id)\n\tvar (\n\t\tmatch\t\tbool\n\t\toptionIndex\tint\n\t\tfoundMatch\tbool\n\t\tfailingParser\tparser\n\t)\n\tfrom := c.offset\n\tto := c.offset\n\tinitialFailOffset := c.failOffset\n\tinitialFailingParser := c.failingParser\n\tfailOffset := initialFailOffset\n\tfor {\n\t\tfoundMatch = false\n\t\toptionIndex = 0\n\t\tfor optionIndex < len(p.options) {\n\t\t\tp.options[optionIndex].parse(c)\n\t\t\toptionIndex++\n\t\t\tif !c.matchLast {\n\t\t\t\tif c.failOffset > failOffset {\n\t\t\t\t\tfailOffset = c.failOffset\n\t\t\t\t\tfailingParser = c.failingParser\n\t\t\t\t}\n\t\t\t}\n\t\t\tif !c.matchLast || match && c.offset <= to {\n\t\t\t\tc.offset = from\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tmatch = true\n\t\t\tfoundMatch = true\n\t\t\tto = c.offset\n\t\t\tc.offset = from\n\t\t\tc.results.setMatch(from, p.id, to)\n\t\t}\n\t\tif !foundMatch {\n\t\t\tbreak\n\t\t}\n\t}\n\tif match {\n\t\tif p.commit&NoKeyword != 0 && c.isKeyword(from, to) {\n\t\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 {\n\t\t\t\tc.failingParser = p\n\t\t\t}\n\t\t\tc.fail(from)\n\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\treturn\n\t\t}\n\t\tif failOffset > to {\n\t\t\tc.failOffset = failOffset\n\t\t\tc.failingParser = failingParser\n\t\t} else if to > initialFailOffset {\n\t\t\tc.failOffset = -1\n\t\t\tc.failingParser = nil\n\t\t} else {\n\t\t\tc.failOffset = initialFailOffset\n\t\t\tc.failingParser = initialFailingParser\n\t\t}\n\t\tc.success(to)\n\t\tc.results.unmarkPending(from, p.id)\n\t\treturn\n\t}\n\tif failOffset > initialFailOffset {\n\t\tc.failOffset = failOffset\n\t\tc.failingParser = failingParser\n\t\tif c.failingParser == nil && p.commitType()&userDefined != 0 && p.commitType()&Whitespace == 0 && p.commitType()&FailPass == 0 {\n\t\t\tc.failingParser = p\n\t\t}\n\t}\n\tc.results.setNoMatch(from, p.id)\n\tc.fail(from)\n\tc.results.unmarkPending(from, p.id)\n}\nfunc (b *choiceBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *choiceBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *choiceBuilder) build(c *context) ([]*Node, bool) {\n\tto, ok := c.results.longestMatch(c.offset, b.id)\n\tif !ok {\n\t\treturn nil, false\n\t}\n\tfrom := c.offset\n\tparsed := to > from\n\tif parsed {\n\t\tc.results.dropMatchTo(c.offset, b.id, to)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.dropMatchTo(c.offset, g, to)\n\t\t}\n\t} else {\n\t\tif c.results.pending(c.offset, b.id) {\n\t\t\treturn nil, false\n\t\t}\n\t\tc.results.markPending(c.offset, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.markPending(c.offset, g)\n\t\t}\n\t}\n\tvar option builder\n\tfor _, o := range b.options {\n\t\tif c.results.hasMatchTo(c.offset, o.nodeID(), to) {\n\t\t\toption = o\n\t\t\tbreak\n\t\t}\n\t}\n\tn, _ := option.build(c)\n\tif !parsed {\n\t\tc.results.unmarkPending(from, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.unmarkPending(from, g)\n\t\t}\n\t}\n\tif b.commit&Alias != 0 {\n\t\treturn n, true\n\t}\n\treturn []*Node{{Name: b.name, From: from, To: to, Nodes: n, tokens: c.tokens}}, true\n}\n\ntype idSet struct{ ids []uint }\n\nfunc divModBits(id int) (int, int) {\n\treturn id / strconv.IntSize, id % strconv.IntSize\n}\nfunc (s *idSet) set(id int) {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\tif d < cap(s.ids) {\n\t\t\ts.ids = s.ids[:d+1]\n\t\t} else {\n\t\t\ts.ids = s.ids[:cap(s.ids)]\n\t\t\tfor i := cap(s.ids); i <= d; i++ {\n\t\t\t\ts.ids = append(s.ids, 0)\n\t\t\t}\n\t\t}\n\t}\n\ts.ids[d] |= 1 << uint(m)\n}\nfunc (s *idSet) unset(id int) {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\treturn\n\t}\n\ts.ids[d] &^= 1 << uint(m)\n}\nfunc (s *idSet) has(id int) bool {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\treturn false\n\t}\n\treturn s.ids[d]&(1< offset {\n\t\treturn ints\n\t}\n\tif cap(ints) > offset {\n\t\tints = ints[:offset+1]\n\t\treturn ints\n\t}\n\tints = ints[:cap(ints)]\n\tfor i := len(ints); i <= offset; i++ {\n\t\tints = append(ints, nil)\n\t}\n\treturn ints\n}\nfunc ensureOffsetIDs(ids []*idSet, offset int) []*idSet {\n\tif len(ids) > offset {\n\t\treturn ids\n\t}\n\tif cap(ids) > offset {\n\t\tids = ids[:offset+1]\n\t\treturn ids\n\t}\n\tids = ids[:cap(ids)]\n\tfor i := len(ids); i <= offset; i++ {\n\t\tids = append(ids, nil)\n\t}\n\treturn ids\n}\nfunc (r *results) setMatch(offset, id, to int) {\n\tr.match = ensureOffsetInts(r.match, offset)\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id || r.match[offset][i+1] != to {\n\t\t\tcontinue\n\t\t}\n\t\treturn\n\t}\n\tr.match[offset] = append(r.match[offset], id, to)\n}\nfunc (r *results) setNoMatch(offset, id int) {\n\tif len(r.match) > offset {\n\t\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\t\tif r.match[offset][i] != id {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t}\n\tr.noMatch = ensureOffsetIDs(r.noMatch, offset)\n\tif r.noMatch[offset] == nil {\n\t\tr.noMatch[offset] = &idSet{}\n\t}\n\tr.noMatch[offset].set(id)\n}\nfunc (r *results) hasMatchTo(offset, id, to int) bool {\n\tif len(r.match) <= offset {\n\t\treturn false\n\t}\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] == to {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (r *results) longestMatch(offset, id int) (int, bool) {\n\tif len(r.match) <= offset {\n\t\treturn 0, false\n\t}\n\tvar found bool\n\tto := -1\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] > to {\n\t\t\tto = r.match[offset][i+1]\n\t\t}\n\t\tfound = true\n\t}\n\treturn to, found\n}\nfunc (r *results) longestResult(offset, id int) (int, bool, bool) {\n\tif len(r.noMatch) > offset && r.noMatch[offset] != nil && r.noMatch[offset].has(id) {\n\t\treturn 0, false, true\n\t}\n\tto, ok := r.longestMatch(offset, id)\n\treturn to, ok, ok\n}\nfunc (r *results) dropMatchTo(offset, id, to int) {\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] == to {\n\t\t\tr.match[offset][i] = -1\n\t\t\treturn\n\t\t}\n\t}\n}\nfunc (r *results) resetPending() {\n\tr.isPending = nil\n}\nfunc (r *results) pending(offset, id int) bool {\n\tif len(r.isPending) <= id {\n\t\treturn false\n\t}\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == offset {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (r *results) markPending(offset, id int) {\n\tr.isPending = ensureOffsetInts(r.isPending, id)\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == -1 {\n\t\t\tr.isPending[id][i] = offset\n\t\t\treturn\n\t\t}\n\t}\n\tr.isPending[id] = append(r.isPending[id], offset)\n}\nfunc (r *results) unmarkPending(offset, id int) {\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == offset {\n\t\t\tr.isPending[id][i] = -1\n\t\t\tbreak\n\t\t}\n\t}\n}\n\ntype context struct {\n\treader\t\tio.RuneReader\n\tkeywords\t[]parser\n\toffset\t\tint\n\treadOffset\tint\n\tconsumed\tint\n\toffsetLimit\tint\n\tfailOffset\tint\n\tfailingParser\tparser\n\treadErr\t\terror\n\teof\t\tbool\n\tresults\t\t*results\n\ttokens\t\t[]rune\n\tmatchLast\tbool\n}\n\nfunc newContext(r io.RuneReader, keywords []parser) *context {\n\treturn &context{reader: r, keywords: keywords, results: &results{}, offsetLimit: -1, failOffset: -1}\n}\nfunc (c *context) read() bool {\n\tif c.eof || c.readErr != nil {\n\t\treturn false\n\t}\n\ttoken, n, err := c.reader.ReadRune()\n\tif err != nil {\n\t\tif errors.Is(err, io.EOF) {\n\t\t\tif n == 0 {\n\t\t\t\tc.eof = true\n\t\t\t\treturn false\n\t\t\t}\n\t\t} else {\n\t\t\tc.readErr = err\n\t\t\treturn false\n\t\t}\n\t}\n\tc.readOffset++\n\tif token == unicode.ReplacementChar {\n\t\tc.readErr = ErrInvalidUnicodeCharacter\n\t\treturn false\n\t}\n\tc.tokens = append(c.tokens, token)\n\treturn true\n}\nfunc (c *context) token() (rune, bool) {\n\tif c.offset == c.offsetLimit {\n\t\treturn 0, false\n\t}\n\tif c.offset == c.readOffset {\n\t\tif !c.read() {\n\t\t\treturn 0, false\n\t\t}\n\t}\n\treturn c.tokens[c.offset], true\n}\nfunc (c *context) fromResults(p parser) bool {\n\tto, m, ok := c.results.longestResult(c.offset, p.nodeID())\n\tif !ok {\n\t\treturn false\n\t}\n\tif m {\n\t\tc.success(to)\n\t} else {\n\t\tc.fail(c.offset)\n\t}\n\treturn true\n}\nfunc (c *context) isKeyword(from, to int) bool {\n\tol := c.offsetLimit\n\tc.offsetLimit = to\n\tdefer func() {\n\t\tc.offsetLimit = ol\n\t}()\n\tfor _, kw := range c.keywords {\n\t\tc.offset = from\n\t\tkw.parse(c)\n\t\tif c.matchLast && c.offset == to {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (c *context) success(to int) {\n\tc.offset = to\n\tc.matchLast = true\n\tif to > c.consumed {\n\t\tc.consumed = to\n\t}\n}\nfunc (c *context) fail(offset int) {\n\tc.offset = offset\n\tc.matchLast = false\n}\nfunc findLine(tokens []rune, offset int) (line, column int) {\n\ttokens = tokens[:offset]\n\tfor i := range tokens {\n\t\tcolumn++\n\t\tif tokens[i] == '\\n' {\n\t\t\tcolumn = 0\n\t\t\tline++\n\t\t}\n\t}\n\treturn\n}\nfunc (c *context) parseError(p parser) error {\n\tdefinition := p.nodeName()\n\tflagIndex := strings.Index(definition, \":\")\n\tif flagIndex > 0 {\n\t\tdefinition = definition[:flagIndex]\n\t}\n\tif c.failingParser == nil {\n\t\tc.failOffset = c.consumed\n\t}\n\tline, col := findLine(c.tokens, c.failOffset)\n\treturn &ParseError{Offset: c.failOffset, Line: line, Column: col, Definition: definition}\n}\nfunc (c *context) finalizeParse(root parser) error {\n\tfp := c.failingParser\n\tif fp == nil {\n\t\tfp = root\n\t}\n\tto, match, found := c.results.longestResult(0, root.nodeID())\n\tif !found || !match || found && match && to < c.readOffset {\n\t\treturn c.parseError(fp)\n\t}\n\tc.read()\n\tif c.eof {\n\t\treturn nil\n\t}\n\tif c.readErr != nil {\n\t\treturn c.readErr\n\t}\n\treturn c.parseError(root)\n}\n\ntype Node struct {\n\tName\t\tstring\n\tNodes\t\t[]*Node\n\tFrom, To\tint\n\ttokens\t\t[]rune\n}\n\nfunc (n *Node) Tokens() []rune {\n\treturn n.tokens\n}\nfunc (n *Node) String() string {\n\treturn fmt.Sprintf(\"%s:%d:%d:%s\", n.Name, n.From, n.To, n.Text())\n}\nfunc (n *Node) Text() string {\n\treturn string(n.Tokens()[n.From:n.To])\n}\n\ntype CommitType int\n\nconst (\n\tNone\tCommitType\t= 0\n\tAlias\tCommitType\t= 1 << iota\n\tWhitespace\n\tNoWhitespace\n\tKeyword\n\tNoKeyword\n\tFailPass\n\tRoot\n\tuserDefined\n)\n\ntype formatFlags int\n\nconst (\n\tformatNone\tformatFlags\t= 0\n\tformatPretty\tformatFlags\t= 1 << iota\n\tformatIncludeComments\n)\n\ntype ParseError struct {\n\tInput\t\tstring\n\tOffset\t\tint\n\tLine\t\tint\n\tColumn\t\tint\n\tDefinition\tstring\n}\ntype parser interface {\n\tnodeName() string\n\tnodeID() int\n\tcommitType() CommitType\n\tparse(*context)\n}\ntype builder interface {\n\tnodeName() string\n\tnodeID() int\n\tbuild(*context) ([]*Node, bool)\n}\n\nvar ErrInvalidUnicodeCharacter = errors.New(\"invalid unicode character\")\n\nfunc (pe *ParseError) Error() string {\n\treturn fmt.Sprintf(\"%s:%d:%d:parse failed, parsing: %s\", pe.Input, pe.Line+1, pe.Column+1, pe.Definition)\n}\nfunc parseInput(r io.Reader, p parser, b builder, kw []parser) (*Node, error) {\n\tc := newContext(bufio.NewReader(r), kw)\n\tp.parse(c)\n\tif c.readErr != nil {\n\t\treturn nil, c.readErr\n\t}\n\tif err := c.finalizeParse(p); err != nil {\n\t\tif perr, ok := err.(*ParseError); ok {\n\t\t\tperr.Input = \"\"\n\t\t}\n\t\treturn nil, err\n\t}\n\tc.offset = 0\n\tc.results.resetPending()\n\tn, _ := b.build(c)\n\treturn n[0], nil\n}\n" +const headCodeExported = "import (\n\t\"strconv\"\n\t\"errors\"\n\t\"io\"\n\t\"strings\"\n\t\"unicode\"\n\t\"fmt\"\n\t\"bufio\"\n)\n\ntype charParser struct {\n\tname\tstring\n\tid\tint\n\tnot\tbool\n\tchars\t[]rune\n\tranges\t[][]rune\n}\ntype charBuilder struct {\n\tname\tstring\n\tid\tint\n}\n\nfunc (p *charParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *charParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *charParser) commitType() CommitType {\n\treturn Alias\n}\nfunc matchChar(chars []rune, ranges [][]rune, not bool, char rune) bool {\n\tfor _, ci := range chars {\n\t\tif ci == char {\n\t\t\treturn !not\n\t\t}\n\t}\n\tfor _, ri := range ranges {\n\t\tif char >= ri[0] && char <= ri[1] {\n\t\t\treturn !not\n\t\t}\n\t}\n\treturn not\n}\nfunc (p *charParser) match(t rune) bool {\n\treturn matchChar(p.chars, p.ranges, p.not, t)\n}\nfunc (p *charParser) parse(c *context) {\n\tif tok, ok := c.token(); !ok || !p.match(tok) {\n\t\tif c.offset > c.failOffset {\n\t\t\tc.failOffset = c.offset\n\t\t\tc.failingParser = nil\n\t\t}\n\t\tc.fail(c.offset)\n\t\treturn\n\t}\n\tc.success(c.offset + 1)\n}\nfunc (b *charBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *charBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *charBuilder) build(c *context) ([]*Node, bool) {\n\treturn nil, false\n}\n\ntype sequenceParser struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tCommitType\n\titems\t\t[]parser\n\tranges\t\t[][]int\n\tgeneralizations\t[]int\n\tallChars\tbool\n}\ntype sequenceBuilder struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tCommitType\n\titems\t\t[]builder\n\tranges\t\t[][]int\n\tgeneralizations\t[]int\n\tallChars\tbool\n}\n\nfunc (p *sequenceParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *sequenceParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *sequenceParser) commitType() CommitType {\n\treturn p.commit\n}\nfunc (p *sequenceParser) parse(c *context) {\n\tif !p.allChars {\n\t\tif c.results.pending(c.offset, p.id) {\n\t\t\tc.fail(c.offset)\n\t\t\treturn\n\t\t}\n\t\tc.results.markPending(c.offset, p.id)\n\t}\n\tvar (\n\t\tcurrentCount\tint\n\t\tparsed\t\tbool\n\t)\n\titemIndex := 0\n\tfrom := c.offset\n\tto := c.offset\n\tfor itemIndex < len(p.items) {\n\t\tp.items[itemIndex].parse(c)\n\t\tif !c.matchLast {\n\t\t\tif currentCount >= p.ranges[itemIndex][0] {\n\t\t\t\titemIndex++\n\t\t\t\tcurrentCount = 0\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tc.offset = from\n\t\t\tif c.fromResults(p) {\n\t\t\t\tif to > c.failOffset {\n\t\t\t\t\tc.failOffset = -1\n\t\t\t\t\tc.failingParser = nil\n\t\t\t\t}\n\t\t\t\tif !p.allChars {\n\t\t\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\t\t}\n\t\t\t\treturn\n\t\t\t}\n\t\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 {\n\t\t\t\tc.failingParser = p\n\t\t\t}\n\t\t\tc.fail(from)\n\t\t\tif !p.allChars {\n\t\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t\tparsed = c.offset > to\n\t\tif parsed {\n\t\t\tcurrentCount++\n\t\t}\n\t\tto = c.offset\n\t\tif !parsed || p.ranges[itemIndex][1] > 0 && currentCount == p.ranges[itemIndex][1] {\n\t\t\titemIndex++\n\t\t\tcurrentCount = 0\n\t\t}\n\t}\n\tif p.commit&NoKeyword != 0 && c.isKeyword(from, to) {\n\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 {\n\t\t\tc.failingParser = p\n\t\t}\n\t\tc.fail(from)\n\t\tif !p.allChars {\n\t\t\tc.results.unmarkPending(from, p.id)\n\t\t}\n\t\treturn\n\t}\n\tfor _, g := range p.generalizations {\n\t\tif c.results.pending(from, g) {\n\t\t\tc.results.setMatch(from, g, to)\n\t\t}\n\t}\n\tif to > c.failOffset {\n\t\tc.failOffset = -1\n\t\tc.failingParser = nil\n\t}\n\tc.results.setMatch(from, p.id, to)\n\tc.success(to)\n\tif !p.allChars {\n\t\tc.results.unmarkPending(from, p.id)\n\t}\n}\nfunc (b *sequenceBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *sequenceBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *sequenceBuilder) build(c *context) ([]*Node, bool) {\n\tto, ok := c.results.longestMatch(c.offset, b.id)\n\tif !ok {\n\t\treturn nil, false\n\t}\n\tfrom := c.offset\n\tparsed := to > from\n\tif b.allChars {\n\t\tc.offset = to\n\t\tif b.commit&Alias != 0 {\n\t\t\treturn nil, true\n\t\t}\n\t\treturn []*Node{{Name: b.name, From: from, To: to, tokens: c.tokens}}, true\n\t} else if parsed {\n\t\tc.results.dropMatchTo(c.offset, b.id, to)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.dropMatchTo(c.offset, g, to)\n\t\t}\n\t} else {\n\t\tif c.results.pending(c.offset, b.id) {\n\t\t\treturn nil, false\n\t\t}\n\t\tc.results.markPending(c.offset, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.markPending(c.offset, g)\n\t\t}\n\t}\n\tvar (\n\t\titemIndex\tint\n\t\tcurrentCount\tint\n\t\tnodes\t\t[]*Node\n\t)\n\tfor itemIndex < len(b.items) {\n\t\titemFrom := c.offset\n\t\tn, ok := b.items[itemIndex].build(c)\n\t\tif !ok {\n\t\t\titemIndex++\n\t\t\tcurrentCount = 0\n\t\t\tcontinue\n\t\t}\n\t\tif c.offset > itemFrom {\n\t\t\tnodes = append(nodes, n...)\n\t\t\tcurrentCount++\n\t\t\tif b.ranges[itemIndex][1] > 0 && currentCount == b.ranges[itemIndex][1] {\n\t\t\t\titemIndex++\n\t\t\t\tcurrentCount = 0\n\t\t\t}\n\t\t\tcontinue\n\t\t}\n\t\tif currentCount < b.ranges[itemIndex][0] {\n\t\t\tfor i := 0; i < b.ranges[itemIndex][0]-currentCount; i++ {\n\t\t\t\tnodes = append(nodes, n...)\n\t\t\t}\n\t\t}\n\t\titemIndex++\n\t\tcurrentCount = 0\n\t}\n\tif !parsed {\n\t\tc.results.unmarkPending(from, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.unmarkPending(from, g)\n\t\t}\n\t}\n\tif b.commit&Alias != 0 {\n\t\treturn nodes, true\n\t}\n\treturn []*Node{{Name: b.name, From: from, To: to, Nodes: nodes, tokens: c.tokens}}, true\n}\n\ntype choiceParser struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tCommitType\n\toptions\t\t[]parser\n\tgeneralizations\t[]int\n}\ntype choiceBuilder struct {\n\tname\t\tstring\n\tid\t\tint\n\tcommit\t\tCommitType\n\toptions\t\t[]builder\n\tgeneralizations\t[]int\n}\n\nfunc (p *choiceParser) nodeName() string {\n\treturn p.name\n}\nfunc (p *choiceParser) nodeID() int {\n\treturn p.id\n}\nfunc (p *choiceParser) commitType() CommitType {\n\treturn p.commit\n}\nfunc (p *choiceParser) parse(c *context) {\n\tif c.fromResults(p) {\n\t\treturn\n\t}\n\tif c.results.pending(c.offset, p.id) {\n\t\tc.fail(c.offset)\n\t\treturn\n\t}\n\tc.results.markPending(c.offset, p.id)\n\tvar (\n\t\tmatch\t\tbool\n\t\toptionIndex\tint\n\t\tfoundMatch\tbool\n\t\tfailingParser\tparser\n\t)\n\tfrom := c.offset\n\tto := c.offset\n\tinitialFailOffset := c.failOffset\n\tinitialFailingParser := c.failingParser\n\tfailOffset := initialFailOffset\n\tfor {\n\t\tfoundMatch = false\n\t\toptionIndex = 0\n\t\tfor optionIndex < len(p.options) {\n\t\t\tp.options[optionIndex].parse(c)\n\t\t\toptionIndex++\n\t\t\tif !c.matchLast {\n\t\t\t\tif c.failOffset > failOffset {\n\t\t\t\t\tfailOffset = c.failOffset\n\t\t\t\t\tfailingParser = c.failingParser\n\t\t\t\t}\n\t\t\t}\n\t\t\tif !c.matchLast || match && c.offset <= to {\n\t\t\t\tc.offset = from\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tmatch = true\n\t\t\tfoundMatch = true\n\t\t\tto = c.offset\n\t\t\tc.offset = from\n\t\t\tc.results.setMatch(from, p.id, to)\n\t\t}\n\t\tif !foundMatch {\n\t\t\tbreak\n\t\t}\n\t}\n\tif match {\n\t\tif p.commit&NoKeyword != 0 && c.isKeyword(from, to) {\n\t\t\tif c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 {\n\t\t\t\tc.failingParser = p\n\t\t\t}\n\t\t\tc.fail(from)\n\t\t\tc.results.unmarkPending(from, p.id)\n\t\t\treturn\n\t\t}\n\t\tif failOffset > to {\n\t\t\tc.failOffset = failOffset\n\t\t\tc.failingParser = failingParser\n\t\t} else if to > initialFailOffset {\n\t\t\tc.failOffset = -1\n\t\t\tc.failingParser = nil\n\t\t} else {\n\t\t\tc.failOffset = initialFailOffset\n\t\t\tc.failingParser = initialFailingParser\n\t\t}\n\t\tc.success(to)\n\t\tc.results.unmarkPending(from, p.id)\n\t\treturn\n\t}\n\tif failOffset > initialFailOffset {\n\t\tc.failOffset = failOffset\n\t\tc.failingParser = failingParser\n\t\tif c.failingParser == nil && p.commitType()&userDefined != 0 && p.commitType()&Whitespace == 0 && p.commitType()&FailPass == 0 {\n\t\t\tc.failingParser = p\n\t\t}\n\t}\n\tc.results.setNoMatch(from, p.id)\n\tc.fail(from)\n\tc.results.unmarkPending(from, p.id)\n}\nfunc (b *choiceBuilder) nodeName() string {\n\treturn b.name\n}\nfunc (b *choiceBuilder) nodeID() int {\n\treturn b.id\n}\nfunc (b *choiceBuilder) build(c *context) ([]*Node, bool) {\n\tto, ok := c.results.longestMatch(c.offset, b.id)\n\tif !ok {\n\t\treturn nil, false\n\t}\n\tfrom := c.offset\n\tparsed := to > from\n\tif parsed {\n\t\tc.results.dropMatchTo(c.offset, b.id, to)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.dropMatchTo(c.offset, g, to)\n\t\t}\n\t} else {\n\t\tif c.results.pending(c.offset, b.id) {\n\t\t\treturn nil, false\n\t\t}\n\t\tc.results.markPending(c.offset, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.markPending(c.offset, g)\n\t\t}\n\t}\n\tvar option builder\n\tfor _, o := range b.options {\n\t\tif c.results.hasMatchTo(c.offset, o.nodeID(), to) {\n\t\t\toption = o\n\t\t\tbreak\n\t\t}\n\t}\n\tn, _ := option.build(c)\n\tif !parsed {\n\t\tc.results.unmarkPending(from, b.id)\n\t\tfor _, g := range b.generalizations {\n\t\t\tc.results.unmarkPending(from, g)\n\t\t}\n\t}\n\tif b.commit&Alias != 0 {\n\t\treturn n, true\n\t}\n\treturn []*Node{{Name: b.name, From: from, To: to, Nodes: n, tokens: c.tokens}}, true\n}\n\ntype idSet struct{ ids []uint }\n\nfunc divModBits(id int) (int, int) {\n\treturn id / strconv.IntSize, id % strconv.IntSize\n}\nfunc (s *idSet) set(id int) {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\tif d < cap(s.ids) {\n\t\t\ts.ids = s.ids[:d+1]\n\t\t} else {\n\t\t\ts.ids = s.ids[:cap(s.ids)]\n\t\t\tfor i := cap(s.ids); i <= d; i++ {\n\t\t\t\ts.ids = append(s.ids, 0)\n\t\t\t}\n\t\t}\n\t}\n\ts.ids[d] |= 1 << uint(m)\n}\nfunc (s *idSet) unset(id int) {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\treturn\n\t}\n\ts.ids[d] &^= 1 << uint(m)\n}\nfunc (s *idSet) has(id int) bool {\n\td, m := divModBits(id)\n\tif d >= len(s.ids) {\n\t\treturn false\n\t}\n\treturn s.ids[d]&(1< offset {\n\t\treturn ints\n\t}\n\tif cap(ints) > offset {\n\t\tints = ints[:offset+1]\n\t\treturn ints\n\t}\n\tints = ints[:cap(ints)]\n\tfor i := len(ints); i <= offset; i++ {\n\t\tints = append(ints, nil)\n\t}\n\treturn ints\n}\nfunc ensureOffsetIDs(ids []*idSet, offset int) []*idSet {\n\tif len(ids) > offset {\n\t\treturn ids\n\t}\n\tif cap(ids) > offset {\n\t\tids = ids[:offset+1]\n\t\treturn ids\n\t}\n\tids = ids[:cap(ids)]\n\tfor i := len(ids); i <= offset; i++ {\n\t\tids = append(ids, nil)\n\t}\n\treturn ids\n}\nfunc (r *results) setMatch(offset, id, to int) {\n\tr.match = ensureOffsetInts(r.match, offset)\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id || r.match[offset][i+1] != to {\n\t\t\tcontinue\n\t\t}\n\t\treturn\n\t}\n\tr.match[offset] = append(r.match[offset], id, to)\n}\nfunc (r *results) setNoMatch(offset, id int) {\n\tif len(r.match) > offset {\n\t\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\t\tif r.match[offset][i] != id {\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t}\n\tr.noMatch = ensureOffsetIDs(r.noMatch, offset)\n\tif r.noMatch[offset] == nil {\n\t\tr.noMatch[offset] = &idSet{}\n\t}\n\tr.noMatch[offset].set(id)\n}\nfunc (r *results) hasMatchTo(offset, id, to int) bool {\n\tif len(r.match) <= offset {\n\t\treturn false\n\t}\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] == to {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (r *results) longestMatch(offset, id int) (int, bool) {\n\tif len(r.match) <= offset {\n\t\treturn 0, false\n\t}\n\tvar found bool\n\tto := -1\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] > to {\n\t\t\tto = r.match[offset][i+1]\n\t\t}\n\t\tfound = true\n\t}\n\treturn to, found\n}\nfunc (r *results) longestResult(offset, id int) (int, bool, bool) {\n\tif len(r.noMatch) > offset && r.noMatch[offset] != nil && r.noMatch[offset].has(id) {\n\t\treturn 0, false, true\n\t}\n\tto, ok := r.longestMatch(offset, id)\n\treturn to, ok, ok\n}\nfunc (r *results) dropMatchTo(offset, id, to int) {\n\tfor i := 0; i < len(r.match[offset]); i += 2 {\n\t\tif r.match[offset][i] != id {\n\t\t\tcontinue\n\t\t}\n\t\tif r.match[offset][i+1] == to {\n\t\t\tr.match[offset][i] = -1\n\t\t\treturn\n\t\t}\n\t}\n}\nfunc (r *results) resetPending() {\n\tr.isPending = nil\n}\nfunc (r *results) pending(offset, id int) bool {\n\tif len(r.isPending) <= id {\n\t\treturn false\n\t}\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == offset {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (r *results) markPending(offset, id int) {\n\tr.isPending = ensureOffsetInts(r.isPending, id)\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == -1 {\n\t\t\tr.isPending[id][i] = offset\n\t\t\treturn\n\t\t}\n\t}\n\tr.isPending[id] = append(r.isPending[id], offset)\n}\nfunc (r *results) unmarkPending(offset, id int) {\n\tfor i := range r.isPending[id] {\n\t\tif r.isPending[id][i] == offset {\n\t\t\tr.isPending[id][i] = -1\n\t\t\tbreak\n\t\t}\n\t}\n}\n\ntype context struct {\n\treader\t\tio.RuneReader\n\tkeywords\t[]parser\n\toffset\t\tint\n\treadOffset\tint\n\tconsumed\tint\n\toffsetLimit\tint\n\tfailOffset\tint\n\tfailingParser\tparser\n\treadErr\t\terror\n\teof\t\tbool\n\tresults\t\t*results\n\ttokens\t\t[]rune\n\tmatchLast\tbool\n}\n\nfunc newContext(r io.RuneReader, keywords []parser) *context {\n\treturn &context{reader: r, keywords: keywords, results: &results{}, offsetLimit: -1, failOffset: -1}\n}\nfunc (c *context) read() bool {\n\tif c.eof || c.readErr != nil {\n\t\treturn false\n\t}\n\ttoken, n, err := c.reader.ReadRune()\n\tif err != nil {\n\t\tif errors.Is(err, io.EOF) {\n\t\t\tif n == 0 {\n\t\t\t\tc.eof = true\n\t\t\t\treturn false\n\t\t\t}\n\t\t} else {\n\t\t\tc.readErr = err\n\t\t\treturn false\n\t\t}\n\t}\n\tc.readOffset++\n\tif token == unicode.ReplacementChar {\n\t\tc.readErr = ErrInvalidUnicodeCharacter\n\t\treturn false\n\t}\n\tc.tokens = append(c.tokens, token)\n\treturn true\n}\nfunc (c *context) token() (rune, bool) {\n\tif c.offset == c.offsetLimit {\n\t\treturn 0, false\n\t}\n\tif c.offset == c.readOffset {\n\t\tif !c.read() {\n\t\t\treturn 0, false\n\t\t}\n\t}\n\treturn c.tokens[c.offset], true\n}\nfunc (c *context) fromResults(p parser) bool {\n\tto, m, ok := c.results.longestResult(c.offset, p.nodeID())\n\tif !ok {\n\t\treturn false\n\t}\n\tif m {\n\t\tc.success(to)\n\t} else {\n\t\tc.fail(c.offset)\n\t}\n\treturn true\n}\nfunc (c *context) isKeyword(from, to int) bool {\n\tol := c.offsetLimit\n\tc.offsetLimit = to\n\tdefer func() {\n\t\tc.offsetLimit = ol\n\t}()\n\tfor _, kw := range c.keywords {\n\t\tc.offset = from\n\t\tkw.parse(c)\n\t\tif c.matchLast && c.offset == to {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\nfunc (c *context) success(to int) {\n\tc.offset = to\n\tc.matchLast = true\n\tif to > c.consumed {\n\t\tc.consumed = to\n\t}\n}\nfunc (c *context) fail(offset int) {\n\tc.offset = offset\n\tc.matchLast = false\n}\nfunc findLine(tokens []rune, offset int) (line, column int) {\n\ttokens = tokens[:offset]\n\tfor i := range tokens {\n\t\tcolumn++\n\t\tif tokens[i] == '\\n' {\n\t\t\tcolumn = 0\n\t\t\tline++\n\t\t}\n\t}\n\treturn\n}\nfunc (c *context) parseError(p parser) error {\n\tdefinition := p.nodeName()\n\tflagIndex := strings.Index(definition, \":\")\n\tif flagIndex > 0 {\n\t\tdefinition = definition[:flagIndex]\n\t}\n\tif c.failingParser == nil {\n\t\tc.failOffset = c.consumed\n\t}\n\tline, col := findLine(c.tokens, c.failOffset)\n\treturn &ParseError{Offset: c.failOffset, Line: line, Column: col, Definition: definition}\n}\nfunc (c *context) finalizeParse(root parser) error {\n\tfp := c.failingParser\n\tif fp == nil {\n\t\tfp = root\n\t}\n\tto, match, found := c.results.longestResult(0, root.nodeID())\n\tif !found || !match || found && match && to < c.readOffset {\n\t\treturn c.parseError(fp)\n\t}\n\tc.read()\n\tif c.eof {\n\t\treturn nil\n\t}\n\tif c.readErr != nil {\n\t\treturn c.readErr\n\t}\n\treturn c.parseError(root)\n}\n\ntype Node struct {\n\tName\tstring\n\tNodes\t[]*Node\n\tFrom\tint\n\tTo\tint\n\ttokens\t[]rune\n}\n\nfunc (n *Node) Tokens() []rune {\n\treturn n.tokens\n}\nfunc (n *Node) String() string {\n\treturn fmt.Sprintf(\"%s:%d:%d:%s\", n.Name, n.From, n.To, n.Text())\n}\nfunc (n *Node) Text() string {\n\treturn string(n.Tokens()[n.From:n.To])\n}\n\ntype CommitType int\n\nconst (\n\tNone\tCommitType\t= 0\n\tAlias\tCommitType\t= 1 << iota\n\tWhitespace\n\tNoWhitespace\n\tKeyword\n\tNoKeyword\n\tFailPass\n\tRoot\n\tuserDefined\n)\n\ntype formatFlags int\n\nconst (\n\tformatNone\tformatFlags\t= 0\n\tformatPretty\tformatFlags\t= 1 << iota\n\tformatIncludeComments\n)\n\ntype ParseError struct {\n\tInput\t\tstring\n\tOffset\t\tint\n\tLine\t\tint\n\tColumn\t\tint\n\tDefinition\tstring\n}\ntype parser interface {\n\tnodeName() string\n\tnodeID() int\n\tcommitType() CommitType\n\tparse(*context)\n}\ntype builder interface {\n\tnodeName() string\n\tnodeID() int\n\tbuild(*context) ([]*Node, bool)\n}\n\nvar ErrInvalidUnicodeCharacter = errors.New(\"invalid unicode character\")\n\nfunc (pe *ParseError) Error() string {\n\treturn fmt.Sprintf(\"%s:%d:%d:parse failed, parsing: %s\", pe.Input, pe.Line+1, pe.Column+1, pe.Definition)\n}\nfunc parseInput(r io.Reader, p parser, b builder, kw []parser) (*Node, error) {\n\tc := newContext(bufio.NewReader(r), kw)\n\tp.parse(c)\n\tif c.readErr != nil {\n\t\treturn nil, c.readErr\n\t}\n\tif err := c.finalizeParse(p); err != nil {\n\t\tif perr, ok := err.(*ParseError); ok {\n\t\t\tperr.Input = \"\"\n\t\t}\n\t\treturn nil, err\n\t}\n\tc.offset = 0\n\tc.results.resetPending()\n\tn, _ := b.build(c)\n\treturn n[0], nil\n}\n" diff --git a/internal/self/self.go b/internal/self/self.go index b6bf2e1..e031352 100644 --- a/internal/self/self.go +++ b/internal/self/self.go @@ -729,10 +729,11 @@ func (c *context) finalizeParse(root parser) error { } type Node struct { - Name string - Nodes []*Node - From, To int - tokens []rune + Name string + Nodes []*Node + From int + To int + tokens []rune } func (n *Node) Tokens() []rune { diff --git a/json_test.go b/json_test.go index 3d129e4..e4c57d7 100644 --- a/json_test.go +++ b/json_test.go @@ -285,7 +285,7 @@ func jsonTreeToJSON(n *Node) (interface{}, error) { } func TestJSON(t *testing.T) { - runTestsFile(t, "examples/json.treerack", []testItem{{ + runTestsFile(t, "docs/examples/json.treerack", []testItem{{ title: "true", text: "true", node: &Node{ @@ -509,7 +509,7 @@ func TestRandomJSON(t *testing.T) { buf := bytes.NewBuffer(b) - s, err := openSyntaxFile("examples/json.treerack") + s, err := openSyntaxFile("docs/examples/json.treerack") if err != nil { t.Error(err) return diff --git a/keyval_test.go b/keyval_test.go index 882c266..1f44e84 100644 --- a/keyval_test.go +++ b/keyval_test.go @@ -3,7 +3,7 @@ package treerack import "testing" func TestKeyVal(t *testing.T) { - runTestsFile(t, "examples/keyval.treerack", []testItem{{ + runTestsFile(t, "docs/examples/keyval.treerack", []testItem{{ title: "empty", }, { title: "a comment", diff --git a/keyword_test.go b/keyword_test.go index c9fc243..d5f41ec 100644 --- a/keyword_test.go +++ b/keyword_test.go @@ -4,20 +4,53 @@ import "testing" func TestKeyword(t *testing.T) { const syntax = ` - keywords:kw = "foo" | "bar"; - symbol:nokw = [a-z]+; + space:ws = " "; + keyword:kw = "foo" | "bar"; + symbol:nokw:nows = [a-z]+; + doc:root = (keyword | symbol)*; ` runTests(t, syntax, []testItem{{ - title: "keyword", - text: "foo", - fail: true, - }, { title: "not keyword", text: "baz", ignorePosition: true, node: &Node{ - Name: "symbol", + Name: "doc", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, + }, { + title: "keyword", + text: "foo", + ignorePosition: true, + node: &Node{ + Name: "doc", + Nodes: []*Node{{ + Name: "keyword", + }}, + }, + }, { + title: "mixed", + text: "foo bar baz bar foo baz bar", + ignorePosition: true, + node: &Node{ + Name: "doc", + Nodes: []*Node{{ + Name: "keyword", + }, { + Name: "keyword", + }, { + Name: "symbol", + }, { + Name: "keyword", + }, { + Name: "keyword", + }, { + Name: "symbol", + }, { + Name: "keyword", + }}, }, }}) } diff --git a/mml_test.go b/mml_test.go index 3e427cc..5b873c3 100644 --- a/mml_test.go +++ b/mml_test.go @@ -3,7 +3,7 @@ package treerack import "testing" func TestMML(t *testing.T) { - s, err := openSyntaxFile("examples/mml.treerack") + s, err := openSyntaxFile("docs/examples/mml.treerack") if err != nil { t.Error(err) return diff --git a/mmlexp2_test.go b/mmlexp2_test.go index 165a54b..7b3a255 100644 --- a/mmlexp2_test.go +++ b/mmlexp2_test.go @@ -5,7 +5,7 @@ import ( ) func TestMMLExp2(t *testing.T) { - s, err := openSyntaxFile("examples/mml-exp2.treerack") + s, err := openSyntaxFile("docs/examples/mml-exp2.treerack") if err != nil { t.Error(err) return diff --git a/mmlexp3_test.go b/mmlexp3_test.go index b6f028e..e034318 100644 --- a/mmlexp3_test.go +++ b/mmlexp3_test.go @@ -5,7 +5,7 @@ import ( ) func TestMMLExp3(t *testing.T) { - s, err := openSyntaxFile("examples/mml-exp3.treerack") + s, err := openSyntaxFile("docs/examples/mml-exp3.treerack") if err != nil { t.Error(err) return diff --git a/mmlexp_test.go b/mmlexp_test.go index 1e74ba0..5d766ef 100644 --- a/mmlexp_test.go +++ b/mmlexp_test.go @@ -9,7 +9,7 @@ import ( ) func TestMMLExp(t *testing.T) { - s, err := openSyntaxFile("examples/mml-exp.treerack") + s, err := openSyntaxFile("docs/examples/mml-exp.treerack") if err != nil { t.Error(err) return @@ -2987,7 +2987,7 @@ func TestMMLFile(t *testing.T) { const n = 180 - s, err := openSyntaxFile("examples/mml-exp.treerack") + s, err := openSyntaxFile("docs/examples/mml-exp.treerack") if err != nil { t.Error(err) return @@ -2995,7 +2995,7 @@ func TestMMLFile(t *testing.T) { s.Init() - f, err := os.Open("examples/test.mml") + f, err := os.Open("docs/examples/test.mml") if err != nil { t.Error(err) return diff --git a/nodehead.go b/nodehead.go index 76f28ec..d90d190 100644 --- a/nodehead.go +++ b/nodehead.go @@ -2,21 +2,39 @@ package treerack import "fmt" +// Node represents a distinct element in the resulting Abstract Syntax Tree (AST) following a successful parse. +// Every named parser that is not an Alias or Whitespace yields a Node. type Node struct { - Name string - Nodes []*Node - From, To int - tokens []rune + + // Name is the identifier of the parser that generated this node. + Name string + + // Nodes contains the child nodes representing the substructures of this node. + Nodes []*Node + + // From is the inclusive character offset of the starting position in the input stream. + From int + + // To is the exclusive character offset of the ending position in the input stream. + To int + + tokens []rune } +// Tokens returns the raw slice of runes from the input stream represented by this node. +// +// Note: This returns a reference to the underlying buffer, not a copy. It should not be modified. func (n *Node) Tokens() []rune { return n.tokens } +// String returns the string representation of the node, including its name, position range (From/To), and the +// captured text content. func (n *Node) String() string { return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text()) } +// Text returns the actual string segment from the input stream represented by this node. func (n *Node) Text() string { return string(n.Tokens()[n.From:n.To]) } diff --git a/readme.md b/readme.md index 7f72307..4f5fe25 100644 --- a/readme.md +++ b/readme.md @@ -1,9 +1,65 @@ # treerack -[WIP] A generic parser generator for Go. +**A parser generator for Go.** -### Examples +Treerack defines and generates recursive descent parsers for arbitrary syntaxes, processing input content into +its Abstract Syntax Tree (AST) representation. It utilizes a custom syntax definition format derived from EBNF +(Extended Backus-Naur Form), allowing for clear and concise grammar descriptions. -- JSON: https://code.squareroundforest.org/arpio/treerack/blob/master/examples/json.treerack -- Scheme: https://code.squareroundforest.org/arpio/treerack/blob/master/examples/scheme.treerack -- Treerack (itself): https://code.squareroundforest.org/arpio/treerack/blob/master/syntax.treerack +## Examples + +- **JSON**: [docs/examples/json.treerack](docs/examples/json.treerack) +- **Scheme**: [docs/examples/scheme.treerack](docs/examples/scheme.treerack) +- **Treerack (self-definition)**: [syntax.treerack](syntax.treerack) + +## Overview + +Treerack operates without a separate lexing phase, parsing character streams directly to produce an AST. The +syntax language supports recursive references, enabling the definition of context-free grammars. + +We can define syntaxes during development and use the provided tool to generate static Go code, which is then +built into the application. Alternatively, the library supports loading syntaxes dynamically at runtime. + +## Installation + +From source: + +``` +git clone https://code.squareroundforest.org/arpio/treerack +cd treerack +make install +``` + +Alternatively: + +``` +go install code.squareroundforest.org/arpio/treerack/cmd/treerack +``` + +## Documentation + +- [Manual](docs/manual.md): A guide to the main use cases supported by Treerack. +- [Syntax Definition](docs/syntax.md): Detailed reference for the Treerack definition language. +- [Library Documentation](https://godocs.io/code.squareroundforest.org/arpio/treerack): GoDoc reference for the + runtime library. + +## Developer Notes + +We use a Makefile to manage the build and verification lifecycle. + +Important: Generating the parser for the Treerack syntax itself (bootstrapping) requires multiple phases. +Consequently, running standard go build or go test commands may miss subtle consistency problems. + +The authoritative way to verify changes is via the makefile: + +``` +make check +``` + +## Limitations + +- Lexer & UTF-8: Treerack does not require a lexer, which simplifies the architecture. However, this enforces + the use of UTF-8 input. We have considered support for custom tokenizers as a potential future improvement. +- Whitespace Delimited Languages: Due to the recursive descent nature and the lack of a dedicated lexer state, + defining whitespace-delimited syntaxes (such as Python-style indentation) can be difficult to achieve with the + current feature set. diff --git a/scheme_test.go b/scheme_test.go index 508770f..4194126 100644 --- a/scheme_test.go +++ b/scheme_test.go @@ -3,7 +3,7 @@ package treerack import "testing" func TestScheme(t *testing.T) { - runTestsFile(t, "examples/scheme.treerack", []testItem{{ + runTestsFile(t, "docs/examples/scheme.treerack", []testItem{{ title: "empty", }, { title: "a function", diff --git a/scripts/createhead.go b/scripts/createhead.go index fb94796..3d6094f 100644 --- a/scripts/createhead.go +++ b/scripts/createhead.go @@ -140,5 +140,10 @@ func main() { varName = "headCodeExported" } - fmt.Printf("package %s\n\n// generated with scripts/createhead.go\nconst %s=%s", packageName, varName, quotedCode) + fmt.Printf( + "package %s\n\n// generated with scripts/createhead.go\nconst %s=%s", + packageName, + varName, + quotedCode, + ) } diff --git a/sexpr_test.go b/sexpr_test.go index 5931b5d..c02f60e 100644 --- a/sexpr_test.go +++ b/sexpr_test.go @@ -3,7 +3,7 @@ package treerack import "testing" func TestSExpr(t *testing.T) { - runTestsFile(t, "examples/sexpr.treerack", []testItem{{ + runTestsFile(t, "docs/examples/sexpr.treerack", []testItem{{ title: "number", text: "42", nodes: []*Node{{ diff --git a/syntax.go b/syntax.go index 8fe298f..3a1efa2 100644 --- a/syntax.go +++ b/syntax.go @@ -1,3 +1,18 @@ +// Package treerack provides a parser generator for defining and interacting with arbitrary syntaxes. +// +// Treerack allows developers to define grammars - programmatically or via a syntax definition language +// derivative of EBNF — and generate recursive descent parsers. These parsers process input content and produce +// an Abstract Syntax Tree (AST) representation. +// +// The library supports two primary workflows: +// +// 1. Dynamic (Runtime): Loading or defining syntaxes programmatically at runtime to parse input immediately. +// +// 2. Static (Generation): Defining syntaxes during development and generating Go source code to be compiled +// into the application. +// +// For detailed syntax definition rules and the command-line tool usage, please refer to the repository +// documentation: https://code.squareroundforest.org/arpio/treerack package treerack import ( @@ -7,14 +22,37 @@ import ( "io" ) -// if min=0&&max=0, it means min=1,max=1 -// else if max<=0, it means no max -// else if min<=0, it means no min +// SequenceItem represents a single element within a sequence definition, referencing another parser by name. +// +// Cardinality logic for SequenceItem: +// +// - If Min=0 and Max=0: Matches exactly once (equivalent to Min=1, Max=1). +// +// - If Max <= 0: Unbounded upper limit (matches Min or more times). +// +// - If Min <= 0: No lower limit (matches 0 to Max times). type SequenceItem struct { - Name string - Min, Max int + + // Name is the identifier of the referenced parser definition. + Name string + + // Min specifies the minimum required occurrences of the item. + Min int + + // Max specifies the maximum accepted occurrences of the item. + Max int } +// Syntax represents a complete grammar definition consisting of multiple named parsers. +// +// The lifecycle of a Syntax instance consists of three phases: +// +// 1. Definition: Define parsers using methods like AnyChar, Sequence, and Choice, or load a definition via +// ReadSyntax. +// +// 2. Initialization: Call Init() to validate definitions, resolve references, and seal the syntax. +// +// 3. Execution: Use Parse() to process input or Generate() to create Go source code. type Syntax struct { registry *registry initialized bool @@ -24,9 +62,15 @@ type Syntax struct { root definition } +// GeneratorOptions control the behavior of the Go code generator. type GeneratorOptions struct { + + // PackageName sets the package name for the generated source file. Defaults to main. PackageName string - Export bool + + // Export determines whether the generated Parse function is exported (public) or unexported (private) + // within the package. + Export bool } // applied in a non-type-checked way @@ -51,11 +95,18 @@ type definition interface { } var ( - ErrSyntaxInitialized = errors.New("syntax initialized") - ErrNoParsersDefined = errors.New("no parsers defined") - ErrInvalidEscapeCharacter = errors.New("invalid escape character") - ErrMultipleRoots = errors.New("multiple roots") - ErrInvalidSymbolName = errors.New("invalid symbol name") + + // ErrSyntaxInitialized is returned when attempting to modify a syntax that has already been initialized. + ErrSyntaxInitialized = errors.New("syntax initialized") + + // ErrNoParsersDefined is returned when attempting to initialize a syntax containing no parser definitions. + ErrNoParsersDefined = errors.New("no parsers defined") + + // ErrMultipleRoots is returned when a syntax definition contains multiple explicit root parsers. + ErrMultipleRoots = errors.New("multiple roots") + + // ErrInvalidSymbolName is returned when a named parser is assigned an invalid identifier. + ErrInvalidSymbolName = errors.New("invalid symbol name") ) func (ct CommitType) String() string { @@ -193,6 +244,7 @@ func (s *Syntax) anyChar(name string, ct CommitType) error { return s.class(name, ct, true, nil, nil) } +// AnyChar registers a parser that accepts any single character (a wildcard). func (s *Syntax) AnyChar(name string, ct CommitType) error { if !isValidSymbol(name) { return ErrInvalidSymbolName @@ -223,6 +275,8 @@ func (s *Syntax) class(name string, ct CommitType, not bool, chars []rune, range return s.sequence(name, ct, SequenceItem{Name: cname}) } +// Class registers a character class parser, accepting characters defined in the specific list or ranges. If +// 'not' is true, it matches any character *except* those defined. func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error { if !isValidSymbol(name) { return ErrInvalidSymbolName @@ -244,6 +298,7 @@ func (s *Syntax) charSequence(name string, ct CommitType, chars []rune) error { return s.sequence(name, ct|NoWhitespace, namesToSequenceItems(refs)...) } +// CharSequence registers a parser that matches a specific string literal (e.g., "foo"). func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error { if !isValidSymbol(name) { return ErrInvalidSymbolName @@ -256,6 +311,7 @@ func (s *Syntax) sequence(name string, ct CommitType, items ...SequenceItem) err return s.register(newSequence(name, ct, items)) } +// Sequence registers a parser that matches a specific order of other named parsers (defined as SequenceItems). func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) error { if !isValidSymbol(name) { return ErrInvalidSymbolName @@ -268,6 +324,7 @@ func (s *Syntax) choice(name string, ct CommitType, options ...string) error { return s.register(newChoice(name, ct, options)) } +// Choice registers a parser that matches exactly one of the provided named options. func (s *Syntax) Choice(name string, ct CommitType, options ...string) error { if !isValidSymbol(name) { return ErrInvalidSymbolName @@ -276,6 +333,7 @@ func (s *Syntax) Choice(name string, ct CommitType, options ...string) error { return s.choice(name, ct|userDefined, options...) } +// ReadSyntax loads a grammar definition from a reader using the Treerack syntax format. func (s *Syntax) ReadSyntax(r io.Reader) error { if s.initialized { return ErrSyntaxInitialized @@ -302,6 +360,8 @@ func (s *Syntax) ReadSyntax(r io.Reader) error { return define(s, n) } +// Init validates, initializes, and seals the syntax. This method must be called exactly once before Parsing or +// Generating. func (s *Syntax) Init() error { if s.errInitFailed != nil { return s.errInitFailed @@ -359,6 +419,7 @@ func (s *Syntax) keywordParsers() []parser { return p } +// Generate writes Go source code implementing the parser to the provided writer. func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error { if err := s.Init(); err != nil { return err @@ -454,6 +515,7 @@ func (s *Syntax) Generate(o GeneratorOptions, w io.Writer) error { return nil } +// Parse reads from the input stream and constructs an AST based on the defined syntax. func (s *Syntax) Parse(r io.Reader) (*Node, error) { if err := s.Init(); err != nil { return nil, err diff --git a/syntaxhead.go b/syntaxhead.go index 0173f36..10e4890 100644 --- a/syntaxhead.go +++ b/syntaxhead.go @@ -7,16 +7,39 @@ import ( "io" ) +// CommitType controls how the output of a named parser is handled and represented in the resulting AST. type CommitType int const ( - None CommitType = 0 + + // None indicates the default behavior: parsed segments are represented as named nodes in the AST. + // Whitespace handling inherits the syntax's global settings. + None CommitType = 0 + + // Alias treats the parser as a pass-through. Validated segments are included in the AST node of the + // enclosing parser rather than creating a distinct node. Alias CommitType = 1 << iota + + // Whitespace designates a parser (typically a character sequence) to be treated as ignored whitespace + // throughout the input. Whitespace + + // NoWhitespace prevents the automatic skipping of defined whitespace characters within specific sequences. NoWhitespace + + // Keyword marks a sequence as a reserved keyword. This allows specific sequences to be protected or + // restricted in certain contexts via the NoKeyword flag. Keyword + + // NoKeyword prevents the parser from matching sequences marked as Keywords. NoKeyword + + // FailPass configures the parser to propagate failure up to the enclosing parser rather than handling it + // locally. FailPass + + // Root explicitly marks the parser as the root of the syntax. By default, the last defined parser is + // considered the root. Root userDefined @@ -30,30 +53,24 @@ const ( formatIncludeComments ) -// ParseError is returned when the input text doesn't match -// the used syntax during parsing. +// ParseError reports a failure to match the input text against the defined syntax. type ParseError struct { - // Input is the name of the input file or if not - // available. + // Input denotes the name of the input source (e.g., filename), or "" if unavailable. Input string - // Offset is the index of the right-most failing - // token in the input text. + // Offset is the index of the right-most token where the parse failed. Offset int - // Line tells the line index of the right-most failing - // token in the input text. + // Line is the zero-based line number of the failure position. // - // It is zero-based, and for error reporting, it is - // recommended to increment it by one. + // For display purposes, increment by one. Line int - // Column tells the column index of the right-most failing - // token in the input text. + // Column is the zero-based column index of the failure position. Column int - // Definition tells the right-most unmatched parser definition. + // Definition identifies the name of the specific parser definition where the match failed. Definition string } @@ -70,8 +87,10 @@ type builder interface { build(*context) ([]*Node, bool) } +// ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences. var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character") +// Error returns the formatted failure message. func (pe *ParseError) Error() string { return fmt.Sprintf( "%s:%d:%d:parse failed, parsing: %s",