1
0

trace parse errors

This commit is contained in:
Arpad Ryszka 2026-06-06 21:19:04 +02:00
parent 5f469c15ac
commit 752e4d2275
20 changed files with 529 additions and 78 deletions

View File

@ -21,11 +21,13 @@ func (p *choiceParser) nodeID() int { return p.id }
func (p *choiceParser) commitType() CommitType { return p.commit } func (p *choiceParser) commitType() CommitType { return p.commit }
func (p *choiceParser) parse(c *context) { func (p *choiceParser) parse(c *context) {
c.trace(p, c.offset, c.offset, Enter)
if c.fromResults(p) { if c.fromResults(p) {
return return
} }
if c.results.pending(c.offset, p.id) { if c.results.pending(c.offset, p.id) {
c.trace(p, c.offset, c.offset, Fail, "same position recursion")
c.fail(c.offset) c.fail(c.offset)
return return
} }
@ -44,15 +46,12 @@ func (p *choiceParser) parse(c *context) {
initialFailOffset := c.failOffset initialFailOffset := c.failOffset
initialFailingParser := c.failingParser initialFailingParser := c.failingParser
failOffset := initialFailOffset failOffset := initialFailOffset
for { for {
foundMatch = false foundMatch = false
optionIndex = 0 optionIndex = 0
for optionIndex < len(p.options) { for optionIndex < len(p.options) {
p.options[optionIndex].parse(c) p.options[optionIndex].parse(c)
optionIndex++ optionIndex++
if !c.matchLast { if !c.matchLast {
if c.failOffset > failOffset { if c.failOffset > failOffset {
failOffset = c.failOffset failOffset = c.failOffset
@ -86,6 +85,7 @@ func (p *choiceParser) parse(c *context) {
c.failingParser = p c.failingParser = p
} }
c.trace(p, from, to, Fail, "illegal keyword")
c.fail(from) c.fail(from)
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
return return
@ -102,6 +102,7 @@ func (p *choiceParser) parse(c *context) {
c.failingParser = initialFailingParser c.failingParser = initialFailingParser
} }
c.trace(p, from, to, Success)
c.success(to) c.success(to)
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
return return
@ -119,6 +120,7 @@ func (p *choiceParser) parse(c *context) {
} }
c.results.setNoMatch(from, p.id) c.results.setNoMatch(from, p.id)
c.trace(p, from, to, Fail, "no match")
c.fail(from) c.fail(from)
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
} }

View File

@ -192,7 +192,6 @@ func (p *choiceParser) generate(w io.Writer, done map[string]bool) error {
fprintf("var p%d = choiceParser{", p.id) fprintf("var p%d = choiceParser{", p.id)
fprintf("id: %d, commit: %d,", p.id, p.commit) fprintf("id: %d, commit: %d,", p.id, p.commit)
if p.commitType()&userDefined != 0 { if p.commitType()&userDefined != 0 {
fprintf("name: \"%s\",", p.name) fprintf("name: \"%s\",", p.name)
} }

View File

@ -2,7 +2,9 @@ package main
import ( import (
"code.squareroundforest.org/arpio/treerack" "code.squareroundforest.org/arpio/treerack"
"errors"
"io" "io"
"os"
) )
type checkOptions struct { type checkOptions struct {
@ -38,6 +40,10 @@ func check(o checkOptions, stdin io.Reader, args ...string) error {
defer finalizeInput() defer finalizeInput()
s := &treerack.Syntax{} s := &treerack.Syntax{}
if err := s.ReadSyntax(syntax); err != nil { if err := s.ReadSyntax(syntax); err != nil {
if terr := treerack.Trace(os.Stderr, err); terr != nil {
err = errors.Join(err, terr)
}
return err return err
} }
@ -45,6 +51,13 @@ func check(o checkOptions, stdin io.Reader, args ...string) error {
return err return err
} }
_, err = s.Parse(input) if _, err := s.Parse(input); err != nil {
if terr := treerack.Trace(os.Stderr, err); terr != nil {
err = errors.Join(err, terr)
}
return err return err
}
return nil
} }

View File

@ -2,7 +2,9 @@ package main
import ( import (
"code.squareroundforest.org/arpio/treerack" "code.squareroundforest.org/arpio/treerack"
"errors"
"io" "io"
"os"
) )
type checkSyntaxOptions struct { type checkSyntaxOptions struct {
@ -25,6 +27,10 @@ func checkSyntax(o checkSyntaxOptions, stdin io.Reader, args ...string) error {
defer finalize() defer finalize()
s := &treerack.Syntax{} s := &treerack.Syntax{}
if err := s.ReadSyntax(syntax); err != nil { if err := s.ReadSyntax(syntax); err != nil {
if terr := treerack.Trace(os.Stderr, err); terr != nil {
err = errors.Join(err, terr)
}
return err return err
} }

View File

@ -18,6 +18,10 @@ type formatOptions struct {
func formatSyntax(in io.Reader, out io.Writer) error { func formatSyntax(in io.Reader, out io.Writer) error {
s := new(treerack.Syntax) s := new(treerack.Syntax)
if err := s.ReadSyntax(in); err != nil { if err := s.ReadSyntax(in); err != nil {
if terr := treerack.Trace(os.Stderr, err); err != nil {
err = errors.Join(err, terr)
}
return err return err
} }

View File

@ -2,7 +2,9 @@ package main
import ( import (
"code.squareroundforest.org/arpio/treerack" "code.squareroundforest.org/arpio/treerack"
"errors"
"io" "io"
"os"
) )
type generateOptions struct { type generateOptions struct {
@ -34,6 +36,10 @@ func generate(o generateOptions, stdin io.Reader, stdout io.Writer, args ...stri
defer finalizeSyntax() defer finalizeSyntax()
s := &treerack.Syntax{} s := &treerack.Syntax{}
if err := s.ReadSyntax(syntax); err != nil { if err := s.ReadSyntax(syntax); err != nil {
if terr := treerack.Trace(os.Stderr, err); err != nil {
err = errors.Join(err, terr)
}
return err return err
} }

View File

@ -3,7 +3,9 @@ package main
import ( import (
"code.squareroundforest.org/arpio/treerack" "code.squareroundforest.org/arpio/treerack"
"encoding/json" "encoding/json"
"errors"
"io" "io"
"os"
) )
type showOptions struct { type showOptions struct {
@ -72,6 +74,10 @@ func show(o showOptions, stdin io.Reader, stdout io.Writer, args ...string) erro
defer finalizeInput() defer finalizeInput()
s := &treerack.Syntax{} s := &treerack.Syntax{}
if err := s.ReadSyntax(syntax); err != nil { if err := s.ReadSyntax(syntax); err != nil {
if terr := treerack.Trace(os.Stderr, err); err != nil {
err = errors.Join(err, terr)
}
return err return err
} }
@ -81,6 +87,10 @@ func show(o showOptions, stdin io.Reader, stdout io.Writer, args ...string) erro
n, err := s.Parse(input) n, err := s.Parse(input)
if err != nil { if err != nil {
if terr := treerack.Trace(os.Stderr, err); err != nil {
err = errors.Join(err, terr)
}
return err return err
} }

View File

@ -7,6 +7,8 @@ import (
"unicode" "unicode"
) )
const maxTraceEntries = 36
type context struct { type context struct {
reader io.RuneReader reader io.RuneReader
keywords []parser keywords []parser
@ -21,6 +23,8 @@ type context struct {
results *results results *results
tokens []rune tokens []rune
matchLast bool matchLast bool
level int
tr []TraceEntry
} }
func newContext(r io.RuneReader, keywords []parser) *context { func newContext(r io.RuneReader, keywords []parser) *context {
@ -136,7 +140,7 @@ func findLine(tokens []rune, offset int) (line, column int) {
return return
} }
func (c *context) parseError(p parser) error { func (c *context) parseError(p parser, unexpectedInput bool, root int) error {
definition := p.nodeName() definition := p.nodeName()
flagIndex := strings.Index(definition, ":") flagIndex := strings.Index(definition, ":")
if flagIndex > 0 { if flagIndex > 0 {
@ -148,11 +152,26 @@ func (c *context) parseError(p parser) error {
} }
line, col := findLine(c.tokens, c.failOffset) line, col := findLine(c.tokens, c.failOffset)
ueLine, ueCol := -1, -1
if unexpectedInput {
to, _, _ := c.results.longestResult(0, root)
ueLine, ueCol = findLine(c.tokens, to)
}
for i := range c.tr {
c.tr[i].FromLine, c.tr[i].FromCol = findLine(c.tokens, c.tr[i].From)
c.tr[i].ToLine, c.tr[i].ToCol = findLine(c.tokens, c.tr[i].To)
}
return &ParseError{ return &ParseError{
inputContent: c.tokens,
Offset: c.failOffset, Offset: c.failOffset,
Line: line, Line: line,
Column: col, Column: col,
Definition: definition, Definition: definition,
Trace: c.tr,
UnexpectedInputLine: ueLine,
UnexpectedInputCol: ueCol,
} }
} }
@ -164,7 +183,7 @@ func (c *context) finalizeParse(root parser) error {
to, match, found := c.results.longestResult(0, root.nodeID()) to, match, found := c.results.longestResult(0, root.nodeID())
if !found || !match || found && match && to < c.readOffset { if !found || !match || found && match && to < c.readOffset {
return c.parseError(fp) return c.parseError(fp, found && match && to < c.readOffset, root.nodeID())
} }
c.read() c.read()
@ -176,5 +195,34 @@ func (c *context) finalizeParse(root parser) error {
return c.readErr return c.readErr
} }
return c.parseError(root) return c.parseError(root, false, root.nodeID())
}
func (c *context) trace(p parser, from, to int, event TraceEvent, reason ...string) {
if p.commitType()&userDefined == 0 || p.commitType()&FailPass != 0 {
return
}
if len(c.tr) == maxTraceEntries {
c.tr = c.tr[1:]
}
switch event {
case Success, Fail:
c.level--
}
c.tr = append(c.tr, TraceEntry{
Level: c.level,
Parser: p.nodeName(),
From: from,
To: to,
Event: event,
Reason: strings.Join(reason, "; "),
})
switch event {
case Enter:
c.level++
}
} }

View File

@ -4,10 +4,14 @@
digit:alias:failpass = [0-9]; digit:alias:failpass = [0-9];
hex:alias:failpass = [0-9a-fA-F]; hex:alias:failpass = [0-9a-fA-F];
alpha:alias:failpass = [a-zA-Z]; alpha:alias:failpass = [a-zA-Z];
delimiter:alias:failpass = ":" | "/" | "?" | "#" | "[" | "]" | "@";
// delimiter:
// delimiter:alias:failpass = ":" | "/" | "?" | "#" | "[" | "]" | "@";
subdelimiter:alias:failpass = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="; subdelimiter:alias:failpass = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=";
unreserved:alias:failpass = alpha | digit | "-" | "." | "_" | "~"; unreserved:alias:failpass = alpha | digit | "-" | "." | "_" | "~";
reserved:alias:failpass = delimiter | subdelimiter;
// reserved:
// reserved:alias:failpass = delimiter | subdelimiter;
percent-encoded:alias:failpass = "%" hex{2}; percent-encoded:alias:failpass = "%" hex{2};
path-char:alias:failpass = unreserved | percent-encoded | subdelimiter | ":" | "@"; path-char:alias:failpass = unreserved | percent-encoded | subdelimiter | ":" | "@";

View File

@ -26,6 +26,16 @@ func testParseErrorItem(s *Syntax, test errorTestItem) func(t *testing.T) {
} }
perr.Input = "" perr.Input = ""
perr.inputContent = nil
if len(test.perr.Trace) == 0 {
perr.Trace = nil
}
if test.perr.UnexpectedInputLine == 0 && test.perr.UnexpectedInputCol == 0 &&
perr.UnexpectedInputLine == -1 && perr.UnexpectedInputCol == -1 {
test.perr.UnexpectedInputLine = -1
test.perr.UnexpectedInputCol = -1
}
if !reflect.DeepEqual(*perr, test.perr) { if !reflect.DeepEqual(*perr, test.perr) {
t.Error("invalid error returned") t.Error("invalid error returned")
@ -93,6 +103,8 @@ func TestError(t *testing.T) {
Line: 1, Line: 1,
Column: 1, Column: 1,
Definition: "a", Definition: "a",
UnexpectedInputLine: 1,
UnexpectedInputCol: 1,
}, },
}, { }, {
title: "multiple definitions", title: "multiple definitions",
@ -120,6 +132,7 @@ func TestError(t *testing.T) {
Offset: 1, Offset: 1,
Column: 1, Column: 1,
Definition: "c", Definition: "c",
UnexpectedInputCol: 1,
}, },
}, { }, {
title: "choice fails", title: "choice fails",
@ -194,6 +207,7 @@ func TestErrorRecursive(t *testing.T) {
Offset: 2, Offset: 2,
Column: 2, Column: 2,
Definition: "function-application", Definition: "function-application",
UnexpectedInputCol: 1,
}, },
}, { }, {
title: "simple, close", title: "simple, close",
@ -202,6 +216,7 @@ func TestErrorRecursive(t *testing.T) {
Offset: 1, Offset: 1,
Column: 1, Column: 1,
Definition: "function-application", Definition: "function-application",
UnexpectedInputCol: 1,
}, },
}, { }, {
title: "inner, open", title: "inner, open",
@ -210,6 +225,7 @@ func TestErrorRecursive(t *testing.T) {
Offset: 5, Offset: 5,
Column: 5, Column: 5,
Definition: "function-application", Definition: "function-application",
UnexpectedInputCol: 1,
}, },
}, { }, {
title: "inner, close", title: "inner, close",
@ -218,6 +234,7 @@ func TestErrorRecursive(t *testing.T) {
Offset: 4, Offset: 4,
Column: 4, Column: 4,
Definition: "function-application", Definition: "function-application",
UnexpectedInputCol: 4,
}, },
}, { }, {
title: "outer, open", title: "outer, open",
@ -226,6 +243,7 @@ func TestErrorRecursive(t *testing.T) {
Offset: 5, Offset: 5,
Column: 5, Column: 5,
Definition: "function-application", Definition: "function-application",
UnexpectedInputCol: 4,
}, },
}, { }, {
title: "outer, close", title: "outer, close",
@ -234,19 +252,21 @@ func TestErrorRecursive(t *testing.T) {
Offset: 4, Offset: 4,
Column: 4, Column: 4,
Definition: "function-application", Definition: "function-application",
UnexpectedInputCol: 4,
}, },
}}) }})
} }
func TestErrorMessage(t *testing.T) { func TestErrorMessage(t *testing.T) {
const expected = "foo:4:10:parse failed, parsing: bar" const expected = "foo:4:10:parse failed, parsing: bar, at 4:10"
perr := &ParseError{ perr := &ParseError{
Input: "foo", Input: "foo",
Offset: 42, Offset: 42,
Line: 3, Line: 3,
Column: 9, Column: 9,
Definition: "bar", Definition: "bar",
UnexpectedInputLine: -1,
UnexpectedInputCol: -1,
} }
message := perr.Error() message := perr.Error()
@ -279,6 +299,7 @@ func TestLongestFail(t *testing.T) {
Line: 0, Line: 0,
Column: 4, Column: 4,
Definition: "function-application", Definition: "function-application",
UnexpectedInputCol: 1,
}, },
}}) }})
} }

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -116,8 +116,10 @@ func (p *sequenceParser) commitType() CommitType {
return p.commit return p.commit
} }
func (p *sequenceParser) parse(c *context) { func (p *sequenceParser) parse(c *context) {
c.trace(p, c.offset, c.offset, Enter)
if !p.allChars { if !p.allChars {
if c.results.pending(c.offset, p.id) { if c.results.pending(c.offset, p.id) {
c.trace(p, c.offset, c.offset, Fail, "same position recursion")
c.fail(c.offset) c.fail(c.offset)
return return
} }
@ -151,6 +153,7 @@ func (p *sequenceParser) parse(c *context) {
if c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 { if c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 {
c.failingParser = p c.failingParser = p
} }
c.trace(p, from, to, Fail, "no match")
c.fail(from) c.fail(from)
if !p.allChars { if !p.allChars {
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
@ -171,6 +174,7 @@ func (p *sequenceParser) parse(c *context) {
if c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 { if c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 {
c.failingParser = p c.failingParser = p
} }
c.trace(p, from, to, Fail, "illegal keyword")
c.fail(from) c.fail(from)
if !p.allChars { if !p.allChars {
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
@ -187,6 +191,7 @@ func (p *sequenceParser) parse(c *context) {
c.failingParser = nil c.failingParser = nil
} }
c.results.setMatch(from, p.id, to) c.results.setMatch(from, p.id, to)
c.trace(p, from, to, Success)
c.success(to) c.success(to)
if !p.allChars { if !p.allChars {
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
@ -292,10 +297,12 @@ func (p *choiceParser) commitType() CommitType {
return p.commit return p.commit
} }
func (p *choiceParser) parse(c *context) { func (p *choiceParser) parse(c *context) {
c.trace(p, c.offset, c.offset, Enter)
if c.fromResults(p) { if c.fromResults(p) {
return return
} }
if c.results.pending(c.offset, p.id) { if c.results.pending(c.offset, p.id) {
c.trace(p, c.offset, c.offset, Fail, "same position recursion")
c.fail(c.offset) c.fail(c.offset)
return return
} }
@ -342,6 +349,7 @@ func (p *choiceParser) parse(c *context) {
if c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 { if c.failingParser == nil && p.commit&userDefined != 0 && p.commit&Whitespace == 0 && p.commit&FailPass == 0 {
c.failingParser = p c.failingParser = p
} }
c.trace(p, from, to, Fail, "illegal keyword")
c.fail(from) c.fail(from)
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
return return
@ -356,6 +364,7 @@ func (p *choiceParser) parse(c *context) {
c.failOffset = initialFailOffset c.failOffset = initialFailOffset
c.failingParser = initialFailingParser c.failingParser = initialFailingParser
} }
c.trace(p, from, to, Success)
c.success(to) c.success(to)
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
return return
@ -368,6 +377,7 @@ func (p *choiceParser) parse(c *context) {
} }
} }
c.results.setNoMatch(from, p.id) c.results.setNoMatch(from, p.id)
c.trace(p, from, to, Fail, "no match")
c.fail(from) c.fail(from)
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
} }
@ -593,6 +603,8 @@ func (r *results) unmarkPending(offset, id int) {
} }
} }
const maxTraceEntries = 36
type context struct { type context struct {
reader io.RuneReader reader io.RuneReader
keywords []parser keywords []parser
@ -607,6 +619,8 @@ type context struct {
results *results results *results
tokens []rune tokens []rune
matchLast bool matchLast bool
level int
tr []TraceEntry
} }
func newContext(r io.RuneReader, keywords []parser) *context { func newContext(r io.RuneReader, keywords []parser) *context {
@ -699,7 +713,7 @@ func findLine(tokens []rune, offset int) (line, column int) {
} }
return return
} }
func (c *context) parseError(p parser) error { func (c *context) parseError(p parser, unexpectedInput bool, root int) error {
definition := p.nodeName() definition := p.nodeName()
flagIndex := strings.Index(definition, ":") flagIndex := strings.Index(definition, ":")
if flagIndex > 0 { if flagIndex > 0 {
@ -709,7 +723,16 @@ func (c *context) parseError(p parser) error {
c.failOffset = c.consumed c.failOffset = c.consumed
} }
line, col := findLine(c.tokens, c.failOffset) line, col := findLine(c.tokens, c.failOffset)
return &ParseError{Offset: c.failOffset, Line: line, Column: col, Definition: definition} ueLine, ueCol := -1, -1
if unexpectedInput {
to, _, _ := c.results.longestResult(0, root)
ueLine, ueCol = findLine(c.tokens, to)
}
for i := range c.tr {
c.tr[i].FromLine, c.tr[i].FromCol = findLine(c.tokens, c.tr[i].From)
c.tr[i].ToLine, c.tr[i].ToCol = findLine(c.tokens, c.tr[i].To)
}
return &ParseError{inputContent: c.tokens, Offset: c.failOffset, Line: line, Column: col, Definition: definition, Trace: c.tr, UnexpectedInputLine: ueLine, UnexpectedInputCol: ueCol}
} }
func (c *context) finalizeParse(root parser) error { func (c *context) finalizeParse(root parser) error {
fp := c.failingParser fp := c.failingParser
@ -718,7 +741,7 @@ func (c *context) finalizeParse(root parser) error {
} }
to, match, found := c.results.longestResult(0, root.nodeID()) to, match, found := c.results.longestResult(0, root.nodeID())
if !found || !match || found && match && to < c.readOffset { if !found || !match || found && match && to < c.readOffset {
return c.parseError(fp) return c.parseError(fp, found && match && to < c.readOffset, root.nodeID())
} }
c.read() c.read()
if c.eof { if c.eof {
@ -727,7 +750,24 @@ func (c *context) finalizeParse(root parser) error {
if c.readErr != nil { if c.readErr != nil {
return c.readErr return c.readErr
} }
return c.parseError(root) return c.parseError(root, false, root.nodeID())
}
func (c *context) trace(p parser, from, to int, event TraceEvent, reason ...string) {
if p.commitType()&userDefined == 0 || p.commitType()&FailPass != 0 {
return
}
if len(c.tr) == maxTraceEntries {
c.tr = c.tr[1:]
}
switch event {
case Success, Fail:
c.level--
}
c.tr = append(c.tr, TraceEntry{Level: c.level, Parser: p.nodeName(), From: from, To: to, Event: event, Reason: strings.Join(reason, "; ")})
switch event {
case Enter:
c.level++
}
} }
type Node struct { type Node struct {
@ -775,12 +815,36 @@ type formatOptions struct {
mode formatFlags mode formatFlags
targetWidth int targetWidth int
} }
type TraceEvent int
const (
Enter TraceEvent = iota
Success
Fail
)
type TraceEntry struct {
Level int
Parser string
From int
To int
FromLine int
FromCol int
ToLine int
ToCol int
Event TraceEvent
Reason string
}
type ParseError struct { type ParseError struct {
inputContent []rune
Input string Input string
Offset int Offset int
Line int Line int
Column int Column int
Definition string Definition string
Trace []TraceEntry
UnexpectedInputLine int
UnexpectedInputCol int
} }
type parser interface { type parser interface {
nodeName() string nodeName() string
@ -796,8 +860,22 @@ type builder interface {
var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character") var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
func (pe *ParseError) InputContent(from, to int) []rune {
if to < 0 {
to = len(pe.inputContent)
}
if to <= from {
return nil
}
c := make([]rune, to-from)
copy(c, pe.inputContent[from:])
return c
}
func (pe *ParseError) Error() string { func (pe *ParseError) Error() string {
return fmt.Sprintf("%s:%d:%d:parse failed, parsing: %s", pe.Input, pe.Line+1, pe.Column+1, pe.Definition) if pe.UnexpectedInputLine >= 0 && pe.UnexpectedInputCol >= 0 {
return fmt.Sprintf("%s:%d:%d:parse failed, unexpected input at %d:%d", pe.Input, pe.UnexpectedInputLine+1, pe.UnexpectedInputCol+1, pe.UnexpectedInputLine+1, pe.UnexpectedInputCol+1)
}
return fmt.Sprintf("%s:%d:%d:parse failed, parsing: %s, at %d:%d", pe.Input, pe.Line+1, pe.Column+1, pe.Definition, pe.Line+1, pe.Column+1)
} }
func parseInput(r io.Reader, p parser, b builder, kw []parser) (Node, error) { func parseInput(r io.Reader, p parser, b builder, kw []parser) (Node, error) {
c := newContext(bufio.NewReader(r), kw) c := newContext(bufio.NewReader(r), kw)
@ -821,7 +899,7 @@ func parseInput(r io.Reader, p parser, b builder, kw []parser) (Node, error) {
func Parse(r io.Reader) (Node, error) { func Parse(r io.Reader) (Node, error) {
var p183 = sequenceParser{id: 183, commit: 256, ranges: [][]int{{0, -1}, {1, 1}, {0, -1}}} var p183 = sequenceParser{id: 183, commit: 768, name: "syntax", ranges: [][]int{{0, -1}, {1, 1}, {0, -1}}}
var p181 = choiceParser{id: 181, commit: 2} var p181 = choiceParser{id: 181, commit: 2}
var p180 = choiceParser{id: 180, commit: 518, name: "wsc", generalizations: []int{181}} var p180 = choiceParser{id: 180, commit: 518, name: "wsc", generalizations: []int{181}}
var p2 = sequenceParser{id: 2, commit: 578, name: "wschar", allChars: true, ranges: [][]int{{1, 1}}, generalizations: []int{180, 181}} var p2 = sequenceParser{id: 2, commit: 578, name: "wschar", allChars: true, ranges: [][]int{{1, 1}}, generalizations: []int{180, 181}}
@ -873,7 +951,7 @@ func Parse(r io.Reader) (Node, error) {
p30.items = []parser{&p26, &p29} p30.items = []parser{&p26, &p29}
p180.options = []parser{&p2, &p30} p180.options = []parser{&p2, &p30}
p181.options = []parser{&p180} p181.options = []parser{&p180}
var p182 = sequenceParser{id: 182, commit: 514, name: "syntax:wsroot", ranges: [][]int{{0, 1}, {0, -1}, {0, 1}, {0, 1}}} var p182 = sequenceParser{id: 182, commit: 66, ranges: [][]int{{0, 1}, {0, -1}, {0, 1}, {0, 1}}}
var p177 = sequenceParser{id: 177, commit: 2, ranges: [][]int{{1, 1}, {0, -1}}} var p177 = sequenceParser{id: 177, commit: 2, ranges: [][]int{{1, 1}, {0, -1}}}
var p173 = sequenceParser{id: 173, commit: 10, allChars: true, ranges: [][]int{{1, 1}, {1, 1}}} var p173 = sequenceParser{id: 173, commit: 10, allChars: true, ranges: [][]int{{1, 1}, {1, 1}}}
var p172 = charParser{id: 172, chars: []rune{59}} var p172 = charParser{id: 172, chars: []rune{59}}
@ -1104,7 +1182,7 @@ func Parse(r io.Reader) (Node, error) {
p179.items = []parser{&p181, &p175, &p178} p179.items = []parser{&p181, &p175, &p178}
p182.items = []parser{&p177, &p181, &p171, &p179} p182.items = []parser{&p177, &p181, &p171, &p179}
p183.items = []parser{&p181, &p182, &p181} p183.items = []parser{&p181, &p182, &p181}
var b183 = sequenceBuilder{id: 183, commit: 256, name: "syntax", ranges: [][]int{{0, -1}, {1, 1}, {0, -1}}} var b183 = sequenceBuilder{id: 183, commit: 768, name: "syntax", ranges: [][]int{{0, -1}, {1, 1}, {0, -1}}}
var b181 = choiceBuilder{id: 181, commit: 2} var b181 = choiceBuilder{id: 181, commit: 2}
var b180 = choiceBuilder{id: 180, commit: 518, generalizations: []int{181}} var b180 = choiceBuilder{id: 180, commit: 518, generalizations: []int{181}}
var b2 = sequenceBuilder{id: 2, commit: 578, allChars: true, ranges: [][]int{{1, 1}}, generalizations: []int{180, 181}} var b2 = sequenceBuilder{id: 2, commit: 578, allChars: true, ranges: [][]int{{1, 1}}, generalizations: []int{180, 181}}
@ -1156,7 +1234,7 @@ func Parse(r io.Reader) (Node, error) {
b30.items = []builder{&b26, &b29} b30.items = []builder{&b26, &b29}
b180.options = []builder{&b2, &b30} b180.options = []builder{&b2, &b30}
b181.options = []builder{&b180} b181.options = []builder{&b180}
var b182 = sequenceBuilder{id: 182, commit: 514, ranges: [][]int{{0, 1}, {0, -1}, {0, 1}, {0, 1}}} var b182 = sequenceBuilder{id: 182, commit: 66, ranges: [][]int{{0, 1}, {0, -1}, {0, 1}, {0, 1}}}
var b177 = sequenceBuilder{id: 177, commit: 2, ranges: [][]int{{1, 1}, {0, -1}}} var b177 = sequenceBuilder{id: 177, commit: 2, ranges: [][]int{{1, 1}, {0, -1}}}
var b173 = sequenceBuilder{id: 173, commit: 10, allChars: true, ranges: [][]int{{1, 1}, {1, 1}}} var b173 = sequenceBuilder{id: 173, commit: 10, allChars: true, ranges: [][]int{{1, 1}, {1, 1}}}
var b172 = charBuilder{} var b172 = charBuilder{}

View File

@ -110,6 +110,7 @@ check-generate: .build/head.gen.go .build/headexported.gen.go .build/self.gen.go
check: $(sources) $(parsers) build check-generate check: $(sources) $(parsers) build check-generate
go test go test
go test ./cmd/treerack go test ./cmd/treerack
set -e; for p in $(parsers); do .build/treerack check-syntax $$p; done
.coverprofile: $(sources) .coverprofile: $(sources)
go test -coverprofile .coverprofile go test -coverprofile .coverprofile

View File

@ -25,8 +25,10 @@ func (p *sequenceParser) nodeID() int { return p.id }
func (p *sequenceParser) commitType() CommitType { return p.commit } func (p *sequenceParser) commitType() CommitType { return p.commit }
func (p *sequenceParser) parse(c *context) { func (p *sequenceParser) parse(c *context) {
c.trace(p, c.offset, c.offset, Enter)
if !p.allChars { if !p.allChars {
if c.results.pending(c.offset, p.id) { if c.results.pending(c.offset, p.id) {
c.trace(p, c.offset, c.offset, Fail, "same position recursion")
c.fail(c.offset) c.fail(c.offset)
return return
} }
@ -71,6 +73,7 @@ func (p *sequenceParser) parse(c *context) {
c.failingParser = p c.failingParser = p
} }
c.trace(p, from, to, Fail, "no match")
c.fail(from) c.fail(from)
if !p.allChars { if !p.allChars {
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
@ -99,6 +102,7 @@ func (p *sequenceParser) parse(c *context) {
c.failingParser = p c.failingParser = p
} }
c.trace(p, from, to, Fail, "illegal keyword")
c.fail(from) c.fail(from)
if !p.allChars { if !p.allChars {
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)
@ -119,6 +123,7 @@ func (p *sequenceParser) parse(c *context) {
} }
c.results.setMatch(from, p.id, to) c.results.setMatch(from, p.id, to)
c.trace(p, from, to, Success)
c.success(to) c.success(to)
if !p.allChars { if !p.allChars {
c.results.unmarkPending(from, p.id) c.results.unmarkPending(from, p.id)

View File

@ -355,7 +355,6 @@ func (p *sequenceParser) generate(w io.Writer, done map[string]bool) error {
fprintf("var p%d = sequenceParser{", p.id) fprintf("var p%d = sequenceParser{", p.id)
fprintf("id: %d, commit: %d,", p.id, p.commit) fprintf("id: %d, commit: %d,", p.id, p.commit)
if p.commit&userDefined != 0 { if p.commit&userDefined != 0 {
fprintf("name: \"%s\",", p.name) fprintf("name: \"%s\",", p.name)
} }

View File

@ -372,11 +372,31 @@ func (s *Syntax) ReadSyntax(r io.Reader) error {
var sperr *self.ParseError var sperr *self.ParseError
if errors.As(err, &sperr) { if errors.As(err, &sperr) {
var perr ParseError var perr ParseError
tr := make([]TraceEntry, len(sperr.Trace))
for i := range sperr.Trace {
tr[i] = TraceEntry{
Level: sperr.Trace[i].Level,
Parser: sperr.Trace[i].Parser,
From: sperr.Trace[i].From,
To: sperr.Trace[i].To,
FromLine: sperr.Trace[i].FromLine,
FromCol: sperr.Trace[i].FromCol,
ToLine: sperr.Trace[i].ToLine,
ToCol: sperr.Trace[i].ToCol,
Event: TraceEvent(sperr.Trace[i].Event),
Reason: sperr.Trace[i].Reason,
}
}
perr.Input = sperr.Input perr.Input = sperr.Input
perr.Offset = sperr.Offset perr.Offset = sperr.Offset
perr.Line = sperr.Line perr.Line = sperr.Line
perr.Column = sperr.Column perr.Column = sperr.Column
perr.Definition = sperr.Definition perr.Definition = sperr.Definition
perr.Trace = tr
perr.UnexpectedInputLine = sperr.UnexpectedInputLine
perr.UnexpectedInputCol = sperr.UnexpectedInputCol
perr.inputContent = sperr.InputContent(0, -1)
return &perr return &perr
} }

View File

@ -62,8 +62,59 @@ type formatOptions struct {
targetWidth int targetWidth int
} }
// TraceEvent indicates the type of a trace entry.
type TraceEvent int
const (
// Enter is the type of a trace entry set when entering the parser of a definition.
Enter TraceEvent = iota
// Success is the type of a trace entry set when the parser of a definition is returning with success.
Success
// Fail is the type of a trace entry set when the parser of a definition is returning with failure.
Fail
)
// TraceEntry represents trace events set during parsing with the purpose of helping to identify problems with
// the input being parsed or to debug a parser.
type TraceEntry struct {
// Level indicates the level of descend while parsing. The root parser has a level of 0.
Level int
// Parser is the name of the parser definition that set the trace event.
Parser string
// From is the offset in the input where the parser, that set the trace event, has started.
From int
// To is the offset in the input where the parser, that set the trace event, has left.
To int
// FromLine is the line number where the parser, that set the trace event, has started.
FromLine int
// FromCol is the column number where the parser, that set the trace event, has started.
FromCol int
// ToLine is the line number where the parser, that set the trace event, has left.
ToLine int
// ToCol is the column number where the parser, that set the trace event, has left.
ToCol int
// Event is the type of the trace event.
Event TraceEvent
// Reason is the reason why the parser, that set the trace event, has failed.
Reason string
}
// ParseError reports a failure to match the input text against the defined syntax. // ParseError reports a failure to match the input text against the defined syntax.
type ParseError struct { type ParseError struct {
inputContent []rune
// Input denotes the name of the input source (e.g., filename), or "<input>" if unavailable. // Input denotes the name of the input source (e.g., filename), or "<input>" if unavailable.
Input string Input string
@ -81,6 +132,15 @@ type ParseError struct {
// Definition identifies the name of the specific parser definition where the match failed. // Definition identifies the name of the specific parser definition where the match failed.
Definition string Definition string
// Trace contains the last parsing steps leading to the parse error.
Trace []TraceEntry
// UnexpectedInputLine has a non-negative value, if the parse failed due to unexpected input.
UnexpectedInputLine int
// UnexpectedInputLine has a non-negative value, if the parse failed due to unexpected input.
UnexpectedInputCol int
} }
type parser interface { type parser interface {
@ -99,14 +159,43 @@ type builder interface {
// ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences. // ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences.
var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character") var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
// InputContent returns the specified slice from the consumed parse input. If to < 0, to is length of the
// available content.
func (pe *ParseError) InputContent(from, to int) []rune {
if to < 0 {
to = len(pe.inputContent)
}
if to <= from {
return nil
}
c := make([]rune, to-from)
copy(c, pe.inputContent[from:])
return c
}
// Error returns the formatted failure message. // Error returns the formatted failure message.
func (pe *ParseError) Error() string { func (pe *ParseError) Error() string {
if pe.UnexpectedInputLine >= 0 && pe.UnexpectedInputCol >= 0 {
return fmt.Sprintf( return fmt.Sprintf(
"%s:%d:%d:parse failed, parsing: %s", "%s:%d:%d:parse failed, unexpected input at %d:%d",
pe.Input,
pe.UnexpectedInputLine+1,
pe.UnexpectedInputCol+1,
pe.UnexpectedInputLine+1,
pe.UnexpectedInputCol+1,
)
}
return fmt.Sprintf(
"%s:%d:%d:parse failed, parsing: %s, at %d:%d",
pe.Input, pe.Input,
pe.Line+1, pe.Line+1,
pe.Column+1, pe.Column+1,
pe.Definition, pe.Definition,
pe.Line+1,
pe.Column+1,
) )
} }

147
trace.go Normal file
View File

@ -0,0 +1,147 @@
package treerack
import (
"errors"
"fmt"
"io"
"strings"
)
const (
maxTraceLineWidth = 108
minEventSnippetWidth = 36
)
func traceLevelOffset(tr []TraceEntry) int {
min := -1
for _, tri := range tr {
if min < 0 || tri.Level < min {
min = tri.Level
}
}
return min
}
func traceIndent(tr TraceEntry, levelOffset int) string {
return strings.Join(make([]string, tr.Level-levelOffset+1), " ")
}
func traceEventSymbol(tr TraceEntry) string {
switch tr.Event {
case Enter:
return ">"
case Success:
return "<"
case Fail:
return "!"
default:
return "?"
}
}
func traceEnterMessage(tr TraceEntry) string {
return fmt.Sprintf("parsing %s at %d:%d", tr.Parser, tr.FromLine+1, tr.FromCol+1)
}
func traceSuccessMessage(tr TraceEntry) string {
return fmt.Sprintf(
"%s success from %d:%d to %d:%d",
tr.Parser,
tr.FromLine+1, tr.FromCol+1,
tr.ToLine+1, tr.ToCol+1,
)
}
func traceFailMessage(tr TraceEntry) string {
if tr.Reason == "" {
return fmt.Sprintf(
"%s failed from %d:%d at %d:%d",
tr.Parser,
tr.FromLine+1, tr.FromCol+1,
tr.ToLine+1, tr.ToCol+1,
)
}
return fmt.Sprintf(
"%s failed from %d:%d at %d:%d, %s",
tr.Parser,
tr.FromLine+1, tr.FromCol+1,
tr.ToLine+1, tr.ToCol+1,
tr.Reason,
)
}
func traceEventMessage(tr TraceEntry) string {
switch tr.Event {
case Enter:
return traceEnterMessage(tr)
case Success:
return traceSuccessMessage(tr)
case Fail:
return traceFailMessage(tr)
default:
return "?"
}
}
func traceEventLine(tr TraceEntry, levelOffset int) string {
i := traceIndent(tr, levelOffset)
e := traceEventSymbol(tr)
m := traceEventMessage(tr)
return fmt.Sprintf("%s%s %s", i, e, m)
}
func traceEventSnippet(pe *ParseError, tr TraceEntry, maxLength int) string {
if maxLength < minEventSnippetWidth {
maxLength = minEventSnippetWidth
}
switch tr.Event {
case Enter, Fail:
if len(pe.InputContent(0, -1)) > tr.To {
tr.To++
}
}
minPos := tr.To - maxLength
if minPos < 0 {
minPos = 0
}
content := pe.InputContent(minPos, tr.To)
lines := strings.Split(string(content), "\n")
content = []rune(lines[len(lines)-1])
if len(content) < maxLength && (len(lines) > 1 || minPos == 0) {
return string(content)
}
if len(content)+3 > maxLength {
content = content[len(content)+3-maxLength:]
}
return "..." + string(content)
}
// Trace checks if the provided error is of type *ParseError, and if yes, it writes the trace attached to the
// error to the provided output. If the err is not of type *ParseError or there is no trace attached, it does
// nothing and returns with nil. Note that it is possible and encouraged to generate custom visualizations of
// the parsing trace, from the structured trace entries attached to errors of type *ParseError, instead of using
// the Trace function.
func Trace(out io.Writer, err error) error {
var perr *ParseError
if !errors.As(err, &perr) {
return nil
}
levelOffset := traceLevelOffset(perr.Trace)
for _, tr := range perr.Trace {
l := traceEventLine(tr, levelOffset)
s := traceEventSnippet(perr, tr, maxTraceLineWidth-len(l)-4)
if _, err := fmt.Fprintf(out, "%s: '%s'\n", l, s); err != nil {
return err
}
}
return nil
}

View File

@ -131,12 +131,11 @@ func applyWhitespaceToDefs(defs []definition) []definition {
func applyWhitespaceToRoot(root definition) (definition, definition) { func applyWhitespaceToRoot(root definition) (definition, definition) {
original, name := root, root.nodeName() original, name := root, root.nodeName()
wsName := patchName(name, "wsroot") wsName := patchName(name, "wsroot")
original.setName(wsName) original.setName(wsName)
original.setCommitType(original.commitType() &^ Root) original.setCommitType(original.commitType() &^ Root)
original.setCommitType(original.commitType() | Alias) original.setCommitType(original.commitType() &^ userDefined)
original.setCommitType(original.commitType() | Alias | FailPass)
root = newSequence(name, Root, []SequenceItem{{ root = newSequence(name, Root|userDefined, []SequenceItem{{
Name: whitespaceName, Name: whitespaceName,
Min: 0, Min: 0,
Max: -1, Max: -1,