From 32b51949b764affb7c498a6785cff87549f3b7cd Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Fri, 29 Dec 2017 14:59:13 +0100 Subject: [PATCH] fix error reporting in recursive definitions --- char.go | 1 - choice.go | 31 +++-- context.go | 6 - errors_test.go | 304 +++++++++++++++++++++++++++++++------------------ notes.txt | 3 + sequence.go | 12 +- syntax.go | 2 +- 7 files changed, 223 insertions(+), 136 deletions(-) diff --git a/char.go b/char.go index 57ac6c7..ce8a6eb 100644 --- a/char.go +++ b/char.go @@ -97,7 +97,6 @@ func (p *charParser) parse(c *context) { if tok, ok := c.token(); !ok || !p.match(tok) { if c.offset > c.failOffset { c.failOffset = c.offset - // println("clearing failing parser") c.failingParser = nil } diff --git a/choice.go b/choice.go index 694c433..477493c 100644 --- a/choice.go +++ b/choice.go @@ -163,8 +163,6 @@ func (p *choiceParser) parse(c *context) { return } - // println("parsing choice", p.name, c.offset) - c.results.markPending(c.offset, p.id) from := c.offset to := c.offset @@ -173,10 +171,10 @@ func (p *choiceParser) parse(c *context) { var optionIndex int var foundMatch bool - // TODO: - // - if there is a failure already, it should be left alone - // - what if reading more means that the previous failurs don't count initialFailOffset := c.failOffset + initialFailingParser := c.failingParser + failOffset := initialFailOffset + var failingParser parser for { foundMatch = false @@ -186,6 +184,13 @@ func (p *choiceParser) parse(c *context) { p.options[optionIndex].parse(c) optionIndex++ + if !c.matchLast { + if c.failOffset > failOffset { + failOffset = c.failOffset + failingParser = c.failingParser + } + } + if !c.matchLast || match && c.offset <= to { c.offset = from continue @@ -204,9 +209,12 @@ func (p *choiceParser) parse(c *context) { } if match { - if to >= c.failOffset { + if to > initialFailOffset { c.failOffset = -1 c.failingParser = nil + } else { + c.failOffset = initialFailOffset + c.failingParser = initialFailingParser } c.success(to) @@ -214,11 +222,12 @@ func (p *choiceParser) parse(c *context) { return } - // TODO: - // - what if all of it pending? - if c.failOffset > initialFailOffset && c.failingParser == nil { - if p.commitType()&userDefined != 0 && p.commitType()&Whitespace == 0 { - // println("recording choice failure", p.name, from, c.failOffset) + if failOffset > initialFailOffset { + c.failOffset = failOffset + c.failingParser = failingParser + if c.failingParser == nil && + p.commitType()&userDefined != 0 && + p.commitType()&Whitespace == 0 { c.failingParser = p } } diff --git a/context.go b/context.go index bc7cfc7..5a65898 100644 --- a/context.go +++ b/context.go @@ -109,7 +109,6 @@ func (c *context) recordFailure(offset int, p parser) { c.failOffset = offset if p.commitType()&userDefined != 0 && p.commitType()&Whitespace == 0 { - // println("setting failing sequence parser", p.nodeName(), offset) c.failingParser = p } } @@ -146,20 +145,15 @@ func (c *context) parseError(p parser) error { func (c *context) finalizeParse(root parser) error { p := c.failingParser if p == nil { - // println("failing parser is nil") p = root } - // println("failing parser is", p.nodeName()) - to, match, found := c.results.longestResult(0, root.nodeID()) if found && match && to < c.readOffset { - // println("forcing root", found, match, to, c.readOffset) return c.parseError(p) } - // TODO: test both cases if !found || !match { return c.parseError(p) } diff --git a/errors_test.go b/errors_test.go index 1ba8e76..d6ffed8 100644 --- a/errors_test.go +++ b/errors_test.go @@ -2,105 +2,135 @@ package treerack import ( "bytes" + "reflect" "testing" ) +func checkParseError(left, right ParseError) bool { + left.registry = nil + right.registry = nil + return reflect.DeepEqual(left, right) +} + func TestError(t *testing.T) { type testItem struct { - title string - syntax string - text string - offset int - line int - column int - definition string + title string + syntax string + text string + perr ParseError } for _, test := range []testItem{{ - title: "single def, empty text", - syntax: `a = "a"`, - definition: "a", + title: "single def, empty text", + syntax: `a = "a"`, + perr: ParseError{ + Definition: "a", + }, }, { - title: "single def, wrong text", - syntax: `a = "a"`, - text: "b", - definition: "a", + title: "single def, wrong text", + syntax: `a = "a"`, + text: "b", + perr: ParseError{ + Definition: "a", + }, }, { - title: "single optional def, wrong text", - syntax: `a = "a"?`, - text: "b", - definition: "a", + title: "single optional def, wrong text", + syntax: `a = "a"?`, + text: "b", + perr: ParseError{ + Definition: "a", + }, }, { - title: "error on second line, second column", - syntax: `a = [a\n]*`, - text: "aa\nabaa\naa", - offset: 4, - line: 1, - column: 1, - definition: "a", + title: "error on second line, second column", + syntax: `a = [a\n]*`, + text: "aa\nabaa\naa", + perr: ParseError{ + Offset: 4, + Line: 1, + Column: 1, + Definition: "a", + }, }, { - title: "multiple definitions", - syntax: `a = "aa"; A:root = a`, - text: "ab", - offset: 1, - column: 1, - definition: "a", + title: "multiple definitions", + syntax: `a = "aa"; A:root = a`, + text: "ab", + perr: ParseError{ + Offset: 1, + Column: 1, + Definition: "a", + }, }, { - title: "choice, options succeed", - syntax: `a = "12"; b = "1"; c:root = a | b`, - text: "123", - offset: 2, - column: 2, - definition: "c", + title: "choice, options succeed", + syntax: `a = "12"; b = "1"; c:root = a | b`, + text: "123", + perr: ParseError{ + Offset: 2, + Column: 2, + Definition: "c", + }, }, { - title: "choice succeeds, document fails", - syntax: `a = "12"; b = "1"; c:root = a | b`, - text: "13", - offset: 1, - column: 1, - definition: "c", + title: "choice succeeds, document fails", + syntax: `a = "12"; b = "1"; c:root = a | b`, + text: "13", + perr: ParseError{ + Offset: 1, + Column: 1, + Definition: "c", + }, }, { - title: "choice fails", - syntax: `a = "12"; b = "2"; c:root = a | b`, - text: "13", - offset: 1, - column: 1, - definition: "a", + title: "choice fails", + syntax: `a = "12"; b = "2"; c:root = a | b`, + text: "13", + perr: ParseError{ + Offset: 1, + Column: 1, + Definition: "a", + }, }, { - title: "choice fails, longer option reported", - syntax: `a = "12"; b = "134"; c:root = a | b`, - text: "135", - offset: 2, - column: 2, - definition: "b", + title: "choice fails, longer option reported", + syntax: `a = "12"; b = "134"; c:root = a | b`, + text: "135", + perr: ParseError{ + Offset: 2, + Column: 2, + Definition: "b", + }, }, { - title: "failing choice on the failing branch", - syntax: `a = "123"; b:root = a | "13"`, - text: "124", - offset: 2, - column: 2, - definition: "a", + title: "failing choice on the failing branch", + syntax: `a = "123"; b:root = a | "13"`, + text: "124", + perr: ParseError{ + Offset: 2, + Column: 2, + Definition: "a", + }, }, { - title: "failing choice on a shorter branch", - syntax: `a = "13"; b:root = "123" | a`, - text: "124", - offset: 2, - column: 2, - definition: "b", + title: "failing choice on a shorter branch", + syntax: `a = "13"; b:root = "123" | a`, + text: "124", + perr: ParseError{ + Offset: 2, + Column: 2, + Definition: "b", + }, }, { - title: "longer failure on a later pass", - syntax: `a = "12"; b = "34"; c = "1" b; d:root = a | c`, - text: "135", - offset: 2, - column: 2, - definition: "b", + title: "longer failure on a later pass", + syntax: `a = "12"; b = "34"; c = "1" b; d:root = a | c`, + text: "135", + perr: ParseError{ + Offset: 2, + Column: 2, + Definition: "b", + }, }, { - title: "char as a choice option", - syntax: `a = "12"; b = [a] | [b]; c = a b`, - text: "12c", - offset: 2, - column: 2, - definition: "b", + title: "char as a choice option", + syntax: `a = "12"; b = [a] | [b]; c = a b`, + text: "12c", + perr: ParseError{ + Offset: 2, + Column: 2, + Definition: "b", + }, }} { t.Run(test.title, func(t *testing.T) { s, err := openSyntaxString(test.syntax) @@ -109,7 +139,6 @@ func TestError(t *testing.T) { return } - // println("starting") _, err = s.Parse(bytes.NewBufferString(test.text)) if err == nil { t.Error("failed to fail") @@ -127,50 +156,105 @@ func TestError(t *testing.T) { return } - if perr.Offset != test.offset { - t.Error("invalid error offset", perr.Offset, test.offset) - return - } - - if perr.Line != test.line { - t.Error("invalid line index", perr.Line, test.line) - return - } - - if perr.Column != test.column { - t.Error("invalid column index", perr.Column, test.column) - } - - if perr.Definition != test.definition { - t.Error("invalid definition", perr.Definition, test.definition) + perr.Input = "" + if !checkParseError(*perr, test.perr) { + t.Error("failed to return the right error") } }) } } func TestErrorRecursive(t *testing.T) { - // const doc = `a[b][1a]` - const doc = `a[1a]` + const syntax = ` + ws:ws = " "; + symbol = [a-z]+; + function-application = expression "(" expression? ")"; + expression = function-application | symbol; + doc:root = (expression (";" expression)*)+; + ` - s, err := openSyntaxFile("examples/mml.treerack") + s, err := openSyntaxString(syntax) if err != nil { - t.Error(err) - return + t.Fatal(err) } - // println("\n<<<< starting >>>>\n") - _, err = s.Parse(bytes.NewBufferString(doc)) - perr, ok := err.(*ParseError) - if !ok { - t.Error("failed to return parse error") - return - } + for _, test := range []struct { + title string + doc string + perr ParseError + }{{ + title: "simple, open", + doc: "a(", + perr: ParseError{ + Offset: 1, + Column: 1, + Definition: "function-application", + }, + }, { + title: "simple, close", + doc: "a)", + perr: ParseError{ + Offset: 1, + Column: 1, + Definition: "function-application", + }, + }, { + title: "inner, open", + doc: "a(b()", + perr: ParseError{ + Offset: 1, + Column: 1, + Definition: "function-application", + }, + }, { + title: "inner, close", + doc: "a(b))", + perr: ParseError{ + Offset: 4, + Column: 4, + Definition: "function-application", + }, + }, { + title: "outer, open", + doc: "a()b(", + perr: ParseError{ + Offset: 4, + Column: 4, + Definition: "function-application", + }, + }, { + title: "outer, close", + doc: "a()b)", + perr: ParseError{ + Offset: 4, + Column: 4, + Definition: "function-application", + }, + }} { + t.Run(test.title, func(t *testing.T) { + _, err := s.Parse(bytes.NewBufferString(test.doc)) + if err == nil { + t.Fatal("failed to fail") + } - t.Log(perr) + perr, ok := err.(*ParseError) + if !ok { + t.Fatal("invalid error type") + } + + perr.Input = "" + + if !checkParseError(*perr, test.perr) { + t.Error("failed to return the right error") + t.Log("got: ", *perr) + t.Log("expected:", test.perr) + } + }) + } } func TestErrorMessage(t *testing.T) { - const expected = "foo:4:10:failed to parse input, expecting: bar" + const expected = "foo:4:10:parse failed, parsing: bar" perr := &ParseError{ Input: "foo", diff --git a/notes.txt b/notes.txt index bb50ad2..c311461 100644 --- a/notes.txt +++ b/notes.txt @@ -29,6 +29,9 @@ verify choice and sequence preference formatter pretty +[optimization] +try preallocate larger store chunks + [problems] can the root be an alias? check the commit mechanism diff --git a/sequence.go b/sequence.go index e203acc..2f95495 100644 --- a/sequence.go +++ b/sequence.go @@ -307,17 +307,15 @@ func (p *sequenceParser) parse(c *context) { from := c.offset to := c.offset var parsed bool - initialFailOffset := c.failOffset for itemIndex < len(p.items) { p.items[itemIndex].parse(c) if !c.matchLast { if currentCount < p.ranges[itemIndex][0] { - if c.failOffset > initialFailOffset && c.failingParser == nil { - if p.commitType()&userDefined != 0 && p.commitType()&Whitespace == 0 { - // println("recording sequence failure", p.name, from, c.failOffset) - c.failingParser = p - } + if c.failingParser == nil && + p.commitType()&userDefined != 0 && + p.commitType()&Whitespace == 0 { + c.failingParser = p } c.fail(from) @@ -352,7 +350,7 @@ func (p *sequenceParser) parse(c *context) { } } - if to >= c.failOffset { + if to > c.failOffset { c.failOffset = -1 c.failingParser = nil } diff --git a/syntax.go b/syntax.go index ed0f622..93f82f3 100644 --- a/syntax.go +++ b/syntax.go @@ -159,7 +159,7 @@ func intsContain(is []int, i int) bool { func (pe *ParseError) Error() string { return fmt.Sprintf( - "%s:%d:%d:failed to parse input, expecting: %s", + "%s:%d:%d:parse failed, parsing: %s", pe.Input, pe.Line+1, pe.Column+1,