diff --git a/char.go b/char.go index e0158be..2f74da6 100644 --- a/char.go +++ b/char.go @@ -58,7 +58,7 @@ func (p *charParser) match(t rune) bool { func (p *charParser) parse(c *context) { if tok, ok := c.token(); !ok || !p.match(tok) { - c.fail(c.offset) + c.fail(p, c.offset) return } diff --git a/choice.go b/choice.go index d195414..bb13d98 100644 --- a/choice.go +++ b/choice.go @@ -131,16 +131,17 @@ func (d *choiceDefinition) parser() parser { func (d *choiceDefinition) builder() builder { return d.cbuilder } -func (p *choiceParser) nodeName() string { return p.name } -func (p *choiceParser) nodeID() int { return p.id } +func (p *choiceParser) nodeName() string { return p.name } +func (p *choiceParser) nodeID() int { return p.id } +func (p *choiceParser) commitType() CommitType { return p.commit } func (p *choiceParser) parse(c *context) { - if c.fromResults(p.id) { + if c.fromResults(p) { return } if c.results.pending(c.offset, p.id) { - c.fail(c.offset) + c.fail(p, c.offset) return } @@ -185,7 +186,7 @@ func (p *choiceParser) parse(c *context) { } c.results.setNoMatch(from, p.id) - c.fail(from) + c.fail(p, from) c.results.unmarkPending(from, p.id) } diff --git a/context.go b/context.go index ff9bbf5..fda3289 100644 --- a/context.go +++ b/context.go @@ -6,14 +6,17 @@ import ( ) type context struct { - reader io.RuneReader - offset int - readOffset int - readErr error - eof bool - results *results - tokens []rune - matchLast bool + reader io.RuneReader + offset int + readOffset int + consumed int + failOffset int + failingParser parser + readErr error + eof bool + results *results + tokens []rune + matchLast bool } func newContext(r io.RuneReader) *context { @@ -62,8 +65,8 @@ func (c *context) token() (rune, bool) { return c.tokens[c.offset], true } -func (c *context) fromResults(id int) bool { - to, m, ok := c.results.longestResult(c.offset, id) +func (c *context) fromResults(p parser) bool { + to, m, ok := c.results.longestResult(c.offset, p.nodeID()) if !ok { return false } @@ -71,7 +74,7 @@ func (c *context) fromResults(id int) bool { if m { c.success(to) } else { - c.fail(c.offset) + c.fail(p, c.offset) } return true @@ -80,21 +83,66 @@ func (c *context) fromResults(id int) bool { func (c *context) success(to int) { c.offset = to c.matchLast = true + if to > c.consumed { + c.consumed = to + } } -func (c *context) fail(offset int) { +func (c *context) fail(p parser, offset int) { c.offset = offset c.matchLast = false + if c.failingParser == nil || c.consumed > c.failOffset { + // TODO: choice can be retried + println("setting fail", p.nodeName(), c.failingParser == nil, c.failOffset, c.consumed) + c.failOffset = c.consumed + if p.commitType()&userDefined != 0 { + c.failingParser = p + } + } } -func (c *context) finalizeParse(rootID int) error { - if !c.matchLast { - return ErrInvalidInput +func findLine(tokens []rune, offset int) (line, column int) { + tokens = tokens[:offset] + for i := range tokens { + column++ + if tokens[i] == '\n' { + column = 0 + line++ + } } - to, match, found := c.results.longestResult(0, rootID) + return +} + +func (c *context) parseError(root parser) error { + definition := root.nodeName() + if c.failingParser == nil { + println("setting fail", c.failOffset, c.consumed) + c.failOffset = c.consumed + } else { + definition = c.failingParser.nodeName() + } + + line, col := findLine(c.tokens, c.failOffset) + + return &ParseError{ + Offset: c.failOffset, + Line: line, + Column: col, + Definition: definition, + } +} + +func (c *context) finalizeParse(root parser) error { + if !c.matchLast { + return c.parseError(root) + } + + to, match, found := c.results.longestResult(0, root.nodeID()) + + // TODO: test all three cases if !found || !match || to < c.readOffset { - return ErrUnexpectedCharacter + return c.parseError(root) } if !c.eof { @@ -104,7 +152,7 @@ func (c *context) finalizeParse(rootID int) error { return c.readErr } - return ErrUnexpectedCharacter + return c.parseError(root) } } diff --git a/define.go b/define.go index 2b1e7c5..6823358 100644 --- a/define.go +++ b/define.go @@ -191,7 +191,7 @@ func defineDefinition(s *Syntax, n *Node) error { return defineExpression( s, n.Nodes[0].Text(), - flagsToCommitType(n.Nodes[1:len(n.Nodes)-1]), + flagsToCommitType(n.Nodes[1:len(n.Nodes)-1])|userDefined, n.Nodes[len(n.Nodes)-1], ) } diff --git a/errors_test.go b/errors_test.go new file mode 100644 index 0000000..f050921 --- /dev/null +++ b/errors_test.go @@ -0,0 +1,115 @@ +package treerack + +import ( + "bytes" + "testing" +) + +func TestError(t *testing.T) { + type testItem struct { + title string + syntax string + text string + offset int + line int + column int + definition string + } + + for _, test := range []testItem{{ + title: "single def, empty text", + syntax: `a = "a"`, + definition: "a", + }, { + title: "single def, wrong text", + syntax: `a = "a"`, + text: "b", + definition: "a", + }, { + title: "single optional def, wrong text", + syntax: `a = "a"?`, + text: "b", + definition: "a", + }, { + title: "error on second line, second column", + syntax: `a = [a\n]*`, + text: "aa\nabaa\naa", + offset: 4, + line: 1, + column: 1, + definition: "a", + }, { + title: "multiple definitions", + syntax: `a = "aa"; A:root = a`, + text: "ab", + offset: 1, + column: 1, + definition: "a", + }, { + title: "choice, options succeed", + syntax: `a = "12"; b = "1"; c:root = a | b`, + text: "123", + offset: 2, + column: 2, + definition: "c", + }, { + title: "choice, longer option fails", + syntax: `a = "12"; b = "1"; c:root = a | b`, + text: "13", + offset: 1, + column: 1, + definition: "a", + }, { + title: "choice, shorter option fails", + syntax: `a = "2"; b = "12"; c:root = a | b`, + text: "123", + offset: 0, + column: 0, + definition: "1", + }, { + title: "choice, both options fail", + syntax: `a = "12"; b = "2"; c:root = a | b`, + text: "13", + offset: 1, + column: 1, + definition: "a", + }} { + t.Run(test.title, func(t *testing.T) { + s, err := openSyntaxString(test.syntax) + if err != nil { + t.Error(err) + return + } + + _, err = s.Parse(bytes.NewBufferString(test.text)) + if err == nil { + t.Error("failed to fail") + return + } + + perr, ok := err.(*ParseError) + if !ok { + t.Error("invalid error returned", err) + return + } + + if perr.Offset != test.offset { + t.Error("invalid error offset", perr.Offset, test.offset) + return + } + + if perr.Line != test.line { + t.Error("invalid line index", perr.Line, test.line) + return + } + + if perr.Column != test.column { + t.Error("invalid column index", perr.Column, test.column) + } + + if perr.Definition != test.definition { + t.Error("invalid definition", perr.Definition, test.definition) + } + }) + } +} diff --git a/open_test.go b/open_test.go index b439c29..52d8cf6 100644 --- a/open_test.go +++ b/open_test.go @@ -17,6 +17,7 @@ func openSyntaxReader(r io.Reader) (*Syntax, error) { return nil, err } + println("starting") s := &Syntax{} if err := define(s, doc); err != nil { return nil, err diff --git a/sequence.go b/sequence.go index 8287a52..2bcfdf8 100644 --- a/sequence.go +++ b/sequence.go @@ -168,13 +168,14 @@ func (d *sequenceDefinition) parser() parser { func (d *sequenceDefinition) builder() builder { return d.sbuilder } -func (p *sequenceParser) nodeName() string { return p.name } -func (p *sequenceParser) nodeID() int { return p.id } +func (p *sequenceParser) nodeName() string { return p.name } +func (p *sequenceParser) nodeID() int { return p.id } +func (p *sequenceParser) commitType() CommitType { return p.commit } func (p *sequenceParser) parse(c *context) { if !p.allChars { if c.results.pending(c.offset, p.id) { - c.fail(c.offset) + c.fail(p, c.offset) return } @@ -191,7 +192,7 @@ func (p *sequenceParser) parse(c *context) { p.items[itemIndex].parse(c) if !c.matchLast { if currentCount < p.ranges[itemIndex][0] { - c.fail(from) + c.fail(p, from) if !p.allChars { c.results.unmarkPending(from, p.id) } diff --git a/syntax.go b/syntax.go index c7d933d..dfbaa62 100644 --- a/syntax.go +++ b/syntax.go @@ -15,6 +15,8 @@ const ( Whitespace NoWhitespace Root + + userDefined ) // if min=0&&max=0, it means min=1,max=1 @@ -25,6 +27,29 @@ type SequenceItem struct { Min, Max int } +// ParseError is returned when the input text doesn't match +// the used syntax during parsing. +type ParseError struct { + + // Offset is the index of the right-most failing + // token in the input text. + Offset int + + // Line tells the line index of the right-most failing + // token in the input text. + // + // It is zero-based, and for error reporting, it is + // recommended to increment it by one. + Line int + + // Column tells the column index of the right-most failing + // token in the input text. + Column int + + // Definition tells the right-most unmatched parser definition. + Definition string +} + type Syntax struct { registry *registry initialized bool @@ -53,6 +78,7 @@ type definition interface { type parser interface { nodeName() string nodeID() int + commitType() CommitType parse(*context) } @@ -117,6 +143,10 @@ func intsContain(is []int, i int) bool { return false } +func (pe *ParseError) Error() string { + return "parse error" +} + func (s *Syntax) applyRoot(d definition) error { explicitRoot := d.commitType()&Root != 0 if explicitRoot && s.explicitRoot { @@ -164,7 +194,7 @@ func (s *Syntax) AnyChar(name string, ct CommitType) error { return ErrInvalidSymbolName } - return s.anyChar(name, ct) + return s.anyChar(name, ct|userDefined) } func childName(name string, childIndex int) string { @@ -194,7 +224,7 @@ func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, range return ErrInvalidSymbolName } - return s.class(name, ct, not, chars, ranges) + return s.class(name, ct|userDefined, not, chars, ranges) } func (s *Syntax) charSequence(name string, ct CommitType, chars []rune) error { @@ -215,7 +245,7 @@ func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error { return ErrInvalidSymbolName } - return s.charSequence(name, ct, chars) + return s.charSequence(name, ct|userDefined, chars) } func (s *Syntax) sequence(name string, ct CommitType, items ...SequenceItem) error { @@ -229,7 +259,7 @@ func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) err return ErrInvalidSymbolName } - return s.sequence(name, ct, items...) + return s.sequence(name, ct|userDefined, items...) } func (s *Syntax) choice(name string, ct CommitType, options ...string) error { @@ -241,7 +271,7 @@ func (s *Syntax) Choice(name string, ct CommitType, options ...string) error { return ErrInvalidSymbolName } - return s.choice(name, ct, options...) + return s.choice(name, ct|userDefined, options...) } func (s *Syntax) Read(r io.Reader) error { @@ -315,7 +345,7 @@ func (s *Syntax) Parse(r io.Reader) (*Node, error) { return nil, c.readErr } - if err := c.finalizeParse(s.parser.nodeID()); err != nil { + if err := c.finalizeParse(s.parser); err != nil { return nil, err }