From 60172b2f9f3721362b943e8fbb3839b72463acd0 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sat, 4 Nov 2017 22:08:15 +0100 Subject: [PATCH] fix bug with building empty items of sequences --- check_test.go | 21 ++--- choice.go | 32 +++++--- context.go | 60 ++++++++++++-- node.go | 10 ++- parse_test.go | 69 ++++++++++++---- results.go | 193 ++++++++++++++++++++------------------------- run_test.go | 41 +++++++--- sequence.go | 39 +++++++-- syntax.go | 2 + whitespace.go | 8 +- whitespace_test.go | 7 +- 11 files changed, 311 insertions(+), 171 deletions(-) diff --git a/check_test.go b/check_test.go index 443fd76..b09ad79 100644 --- a/check_test.go +++ b/check_test.go @@ -43,27 +43,28 @@ func checkNode(t *testing.T, ignorePosition bool, left, right *Node) { return } - if len(left.Nodes) != len(right.Nodes) { - t.Error("length doesn't match", left.Name, len(left.Nodes), len(right.Nodes)) + lnodes, rnodes := left.Nodes, right.Nodes + if len(lnodes) != len(rnodes) { + t.Error("length doesn't match", left.Name, len(lnodes), len(rnodes)) t.Log(left) t.Log(right) for { - if len(left.Nodes) > 0 { - t.Log("<", left.Nodes[0]) - left.Nodes = left.Nodes[1:] + if len(lnodes) > 0 { + t.Log("<", lnodes[0]) + lnodes = lnodes[1:] } - if len(right.Nodes) > 0 { - t.Log(">", right.Nodes[0]) - right.Nodes = right.Nodes[1:] + if len(rnodes) > 0 { + t.Log(">", rnodes[0]) + rnodes = rnodes[1:] } - if len(left.Nodes) == 0 && len(right.Nodes) == 0 { + if len(lnodes) == 0 && len(rnodes) == 0 { break } } return } - checkNodes(t, ignorePosition, left.Nodes, right.Nodes) + checkNodes(t, ignorePosition, lnodes, rnodes) } diff --git a/choice.go b/choice.go index c4e5e6c..33f6b4c 100644 --- a/choice.go +++ b/choice.go @@ -50,12 +50,12 @@ func (d *choiceDefinition) validate(r *registry) error { d.validated = true for i := range d.options { - e, ok := r.definitions[d.options[i]] + o, ok := r.definitions[d.options[i]] if !ok { return parserNotFound(d.options[i]) } - if err := e.validate(r); err != nil { + if err := o.validate(r); err != nil { return err } } @@ -76,8 +76,8 @@ func (d *choiceDefinition) createBuilder() { } func (d *choiceDefinition) initOptions(r *registry) { - for _, e := range d.options { - def := r.definitions[e] + for _, o := range d.options { + def := r.definitions[o] d.optionDefs = append(d.optionDefs, def) def.init(r) d.cbuilder.options = append(d.cbuilder.options, def.builder()) @@ -101,8 +101,8 @@ func (d *choiceDefinition) addGeneralization(g int) { } d.generalizations = append(d.generalizations, g) - for _, e := range d.optionDefs { - e.addGeneralization(g) + for _, o := range d.optionDefs { + o.addGeneralization(g) } } @@ -201,15 +201,25 @@ func (b *choiceBuilder) nodeName() string { return b.name } func (b *choiceBuilder) nodeID() int { return b.id } func (b *choiceBuilder) build(c *context) ([]*Node, bool) { - to, ok := c.results.takeMatch(c.offset, b.id) + to, ok := c.results.longestMatch(c.offset, b.id) if !ok { return nil, false } + if c.buildPending(c.offset, b.id, to) { + return nil, false + } + + c.markBuildPending(c.offset, b.id, to) + + if to-c.offset > 0 { + c.results.dropMatchTo(c.offset, b.id, to) + } + var option builder - for _, e := range b.options { - if c.results.hasMatchTo(c.offset, e.nodeID(), to) { - option = e + for _, o := range b.options { + if c.results.hasMatchTo(c.offset, o.nodeID(), to) { + option = o break } } @@ -225,6 +235,8 @@ func (b *choiceBuilder) build(c *context) ([]*Node, bool) { panic("damaged parse result") } + c.unmarkBuildPending(from, b.id, to) + if b.commit&Alias != 0 { return n, true } diff --git a/context.go b/context.go index 2658727..9450087 100644 --- a/context.go +++ b/context.go @@ -63,7 +63,7 @@ func (c *context) token() (rune, bool) { return c.tokens[c.offset], true } -func (c *context) pending(offset int, id int) bool { +func (c *context) pending(offset, id int) bool { if len(c.isPending) <= id { return false } @@ -77,7 +77,7 @@ func (c *context) pending(offset int, id int) bool { return false } -func (c *context) markPending(offset int, id int) { +func (c *context) markPending(offset, id int) { if len(c.isPending) <= id { if cap(c.isPending) > id { c.isPending = c.isPending[:id+1] @@ -99,7 +99,7 @@ func (c *context) markPending(offset int, id int) { c.isPending[id] = append(c.isPending[id], offset) } -func (c *context) unmarkPending(offset int, id int) { +func (c *context) unmarkPending(offset, id int) { for i := range c.isPending[id] { if c.isPending[id][i] == offset { c.isPending[id][i] = -1 @@ -108,8 +108,58 @@ func (c *context) unmarkPending(offset int, id int) { } } +func (c *context) resetPending() { + c.isPending = nil +} + +func (c *context) buildPending(offset, id, to int) bool { + if len(c.isPending) <= id { + return false + } + + for i := 0; i < len(c.isPending[id]); i += 2 { + if c.isPending[id][i] == offset && c.isPending[id][i+1] == to { + return true + } + } + + return false +} + +func (c *context) markBuildPending(offset, id, to int) { + if len(c.isPending) <= id { + if cap(c.isPending) > id { + c.isPending = c.isPending[:id+1] + } else { + c.isPending = c.isPending[:cap(c.isPending)] + for i := cap(c.isPending); i <= id; i++ { + c.isPending = append(c.isPending, nil) + } + } + } + + for i := 0; i < len(c.isPending[id]); i += 2 { + if c.isPending[id][i] == -1 { + c.isPending[id][i] = offset + c.isPending[id][i+1] = to + return + } + } + + c.isPending[id] = append(c.isPending[id], offset, to) +} + +func (c *context) unmarkBuildPending(offset, id, to int) { + for i := 0; i < len(c.isPending[id]); i += 2 { + if c.isPending[id][i] == offset && c.isPending[id][i+1] == to { + c.isPending[id][i] = -1 + break + } + } +} + func (c *context) fromResults(id int) bool { - to, m, ok := c.results.getMatch(c.offset, id) + to, m, ok := c.results.longestResult(c.offset, id) if !ok { return false } @@ -138,7 +188,7 @@ func (c *context) finalizeParse(rootID int) error { return ErrInvalidInput } - to, match, found := c.results.getMatch(0, rootID) + to, match, found := c.results.longestResult(0, rootID) if !found || !match || to < c.readOffset { return ErrUnexpectedCharacter } diff --git a/node.go b/node.go index 1c49cf6..465f0b7 100644 --- a/node.go +++ b/node.go @@ -83,8 +83,14 @@ func (n *Node) commit(t []rune) { } func (n *Node) String() string { - if n.From >= len(n.tokens) || n.To > len(n.tokens) { - return n.Name + ":invalid" + if n.From >= len(n.tokens) && n.To != n.From || n.To > len(n.tokens) { + return fmt.Sprintf( + "%s:invalid:%d:%d:%d", + n.Name, + len(n.tokens), + n.From, + n.To, + ) } return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text()) diff --git a/parse_test.go b/parse_test.go index 43edb4c..41f8e81 100644 --- a/parse_test.go +++ b/parse_test.go @@ -133,6 +133,34 @@ func TestRecursion(t *testing.T) { }, }}, ) + + runTests( + t, + `A = "a" | A*`, + []testItem{{ + title: "recursive sequence in choice", + text: "aaaa", + ignorePosition: true, + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }, { + Name: "A", + Nodes: []*Node{{ + Name: "A", + }, { + Name: "A", + Nodes: []*Node{{ + Name: "A", + }, { + Name: "A", + }}, + }}, + }}, + }, + }}, + ) } func TestSequence(t *testing.T) { @@ -188,33 +216,24 @@ func TestSequence(t *testing.T) { }, }}, ) -} -func TestSequenceBug(t *testing.T) { runTests( t, - `A = "a" | A*`, + `a = "a"?; A = a | a*`, []testItem{{ - title: "BUG: recursive sequence in choice", - text: "aaa", + title: "single or zero-or-more optional in choice", + text: "aaa", + ignorePosition: true, node: &Node{ Name: "A", Nodes: []*Node{{ - Name: "A", + Name: "a", }, { - Name: "A", - Nodes: []*Node{{ - Name: "A", - }, { - Name: "A", - }, { - Name: "A", - }}, + Name: "a", }, { - Name: "A", + Name: "a", }}, }, - ignorePosition: true, }}, ) } @@ -571,12 +590,14 @@ func TestUndefined(t *testing.T) { n, err := s.Parse(bytes.NewBufferString("a = b")) if err != nil { t.Error(err) + return } stest := &Syntax{} err = define(stest, n) if err != nil { t.Error(err) + return } if err := stest.Init(); err == nil { @@ -628,6 +649,22 @@ func TestEmpty(t *testing.T) { }, }}, ) + + runTests( + t, + `a = [a]*; a':alias = a; a'' = a' [^a]*`, + []testItem{{ + title: "no a", + text: "b", + ignorePosition: true, + node: &Node{ + Name: "a''", + Nodes: []*Node{{ + Name: "a", + }}, + }, + }}, + ) } func TestCharAsRoot(t *testing.T) { diff --git a/results.go b/results.go index 85c8354..c53e3ce 100644 --- a/results.go +++ b/results.go @@ -5,120 +5,39 @@ type results struct { match [][]int } -func (s *results) getMatch(offset, id int) (int, bool, bool) { - if len(s.noMatch) > offset && s.noMatch[offset] != nil && s.noMatch[offset].has(id) { - return 0, false, true - } - - if len(s.match) <= offset { - return 0, false, false - } - - var ( - found bool - to int - ) - - for i := 0; i < len(s.match[offset]); i += 2 { - if s.match[offset][i] != id { - continue - } - - found = true - if s.match[offset][i+1] > to { - to = s.match[offset][i+1] - } - } - - return to, found, found -} - -func (s *results) hasMatchTo(offset, id, to int) bool { - if len(s.noMatch) > offset && s.noMatch[offset] != nil && s.noMatch[offset].has(id) { - return false - } - - if len(s.match) <= offset { - return false - } - - for i := 0; i < len(s.match[offset]); i += 2 { - if s.match[offset][i] != id { - continue - } - - if s.match[offset][i+1] == to { - return true - } - } - - return false -} - -func (s *results) takeMatch(offset, id int) (int, bool) { - if len(s.match) <= offset { - return 0, false - } - - var ( - found bool - to int - index int - ) - - for i := 0; i < len(s.match[offset]); i += 2 { - if s.match[offset][i] != id { - continue - } - - if s.match[offset][i+1] > to || !found { - to = s.match[offset][i+1] - index = i - } - - found = true - } - - if found && to-offset > 0 { - s.match[offset][index] = -1 - } - - return to, found -} - -func (s *results) ensureOffset(offset int) { - if len(s.match) > offset { +func (r *results) ensureOffset(offset int) { + if len(r.match) > offset { return } - if cap(s.match) > offset { - s.match = s.match[:offset+1] + if cap(r.match) > offset { + r.match = r.match[:offset+1] return } - s.match = s.match[:cap(s.match)] - for i := len(s.match); i <= offset; i++ { - s.match = append(s.match, nil) + r.match = r.match[:cap(r.match)] + for i := len(r.match); i <= offset; i++ { + r.match = append(r.match, nil) } } -func (s *results) setMatch(offset, id, to int) { - s.ensureOffset(offset) - for i := 0; i < len(s.match[offset]); i += 2 { - if s.match[offset][i] != id || s.match[offset][i+1] != to { +func (r *results) setMatch(offset, id, to int) { + r.ensureOffset(offset) + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id || r.match[offset][i+1] != to { continue } return } - s.match[offset] = append(s.match[offset], id, to) + r.match[offset] = append(r.match[offset], id, to) } -func (s *results) setNoMatch(offset, id int) { - if len(s.match) > offset { - for i := 0; i < len(s.match[offset]); i += 2 { - if s.match[offset][i] != id { +func (r *results) setNoMatch(offset, id int) { + if len(r.match) > offset { + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id { continue } @@ -126,20 +45,82 @@ func (s *results) setNoMatch(offset, id int) { } } - if len(s.noMatch) <= offset { - if cap(s.noMatch) > offset { - s.noMatch = s.noMatch[:offset+1] + if len(r.noMatch) <= offset { + if cap(r.noMatch) > offset { + r.noMatch = r.noMatch[:offset+1] } else { - s.noMatch = s.noMatch[:cap(s.noMatch)] - for i := cap(s.noMatch); i <= offset; i++ { - s.noMatch = append(s.noMatch, nil) + r.noMatch = r.noMatch[:cap(r.noMatch)] + for i := cap(r.noMatch); i <= offset; i++ { + r.noMatch = append(r.noMatch, nil) } } } - if s.noMatch[offset] == nil { - s.noMatch[offset] = &idSet{} + if r.noMatch[offset] == nil { + r.noMatch[offset] = &idSet{} } - s.noMatch[offset].set(id) + r.noMatch[offset].set(id) +} + +func (r *results) hasMatchTo(offset, id, to int) bool { + if len(r.match) <= offset { + return false + } + + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id { + continue + } + + if r.match[offset][i+1] == to { + return true + } + } + + return false +} + +func (r *results) longestMatch(offset, id int) (int, bool) { + if len(r.match) <= offset { + return 0, false + } + + var found bool + to := -1 + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id { + continue + } + + if r.match[offset][i+1] > to { + to = r.match[offset][i+1] + } + + found = true + } + + return to, found +} + +func (r *results) longestResult(offset, id int) (int, bool, bool) { + if len(r.noMatch) > offset && r.noMatch[offset] != nil && r.noMatch[offset].has(id) { + return 0, false, true + } + + to, ok := r.longestMatch(offset, id) + return to, ok, ok +} + +func (r *results) dropMatchTo(offset, id, to int) { + for i := 0; i < len(r.match[offset]); i += 2 { + if r.match[offset][i] != id { + continue + } + + if r.match[offset][i+1] == to { + r.match[offset][i] = -1 + return + } + } } diff --git a/run_test.go b/run_test.go index 6bfafce..0e1d798 100644 --- a/run_test.go +++ b/run_test.go @@ -15,9 +15,18 @@ type testItem struct { ignorePosition bool } -func runTestsSyntax(t *testing.T, s *Syntax, tests []testItem) { +func runTestsGetSyntax(t *testing.T, getSyntax func(t *testing.T) *Syntax, tests []testItem) { + var s *Syntax for _, test := range tests { t.Run(test.title, func(t *testing.T) { + if s == nil { + s = getSyntax(t) + } + + if t.Failed() { + return + } + b := bytes.NewBufferString(test.text) start := time.Now() @@ -43,22 +52,32 @@ func runTestsSyntax(t *testing.T, s *Syntax, tests []testItem) { } } +func runTestsSyntax(t *testing.T, s *Syntax, tests []testItem) { + runTestsGetSyntax(t, func(*testing.T) *Syntax { return s }, tests) +} + func runTests(t *testing.T, syntax string, tests []testItem) { - s, err := openSyntaxString(syntax) - if err != nil { - t.Error(err) - return + getSyntax := func(t *testing.T) *Syntax { + s, err := openSyntaxString(syntax) + if err != nil { + t.Error(err) + } + + return s } - runTestsSyntax(t, s, tests) + runTestsGetSyntax(t, getSyntax, tests) } func runTestsFile(t *testing.T, file string, tests []testItem) { - s, err := openSyntaxFile(file) - if err != nil { - t.Error(err) - return + getSyntax := func(t *testing.T) *Syntax { + s, err := openSyntaxFile(file) + if err != nil { + t.Error(err) + } + + return s } - runTestsSyntax(t, s, tests) + runTestsGetSyntax(t, getSyntax, tests) } diff --git a/sequence.go b/sequence.go index a3ed90a..41bf1df 100644 --- a/sequence.go +++ b/sequence.go @@ -184,6 +184,8 @@ func (p *sequenceParser) parse(c *context) { // TODO: // - is it ok to parse before max range check? what if max=0 // - validate, normalize and document max=0 + + // TODO: test this f(g()) p.items[itemIndex].parse(c) if !c.matchLast { if currentCount < p.ranges[itemIndex][0] { @@ -232,18 +234,27 @@ func (b *sequenceBuilder) nodeName() string { return b.name } func (b *sequenceBuilder) nodeID() int { return b.id } func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { - to, ok := c.results.takeMatch(c.offset, b.id) + to, ok := c.results.longestMatch(c.offset, b.id) if !ok { return nil, false } - if to-c.offset == 0 && b.commit&Alias != 0 { - return nil, true + if c.buildPending(c.offset, b.id, to) { + return nil, false } + c.markBuildPending(c.offset, b.id, to) + + if to-c.offset > 0 { + c.results.dropMatchTo(c.offset, b.id, to) + } + + from := c.offset + if b.allChars { - from := c.offset c.offset = to + c.unmarkBuildPending(from, b.id, to) + if b.commit&Alias != 0 { return nil, true } @@ -256,7 +267,6 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { }}, true } - from := c.offset var ( itemIndex int currentCount int @@ -277,17 +287,32 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { } parsed := c.offset > itemFrom - if parsed || len(n) > 0 { + + if parsed { nodes = append(nodes, n...) currentCount++ } - if !parsed || b.ranges[itemIndex][1] >= 0 && currentCount == b.ranges[itemIndex][1] { + if !parsed { + if currentCount < b.ranges[itemIndex][0] { + for i := 0; i < b.ranges[itemIndex][0]-currentCount; i++ { + nodes = append(nodes, n...) + } + } + + itemIndex++ + currentCount = 0 + continue + } + + if b.ranges[itemIndex][1] >= 0 && currentCount == b.ranges[itemIndex][1] { itemIndex++ currentCount = 0 } } + c.unmarkBuildPending(from, b.id, to) + if b.commit&Alias != 0 { return nodes, true } diff --git a/syntax.go b/syntax.go index 055bc6e..3545de1 100644 --- a/syntax.go +++ b/syntax.go @@ -318,6 +318,8 @@ func (s *Syntax) Parse(r io.Reader) (*Node, error) { } c.offset = 0 + c.resetPending() + n, ok := s.builder.build(c) if !ok || len(n) != 1 { panic("damaged parse result") diff --git a/whitespace.go b/whitespace.go index efd8c82..a85b7da 100644 --- a/whitespace.go +++ b/whitespace.go @@ -66,6 +66,7 @@ func applyWhitespaceToSeq(s *sequenceDefinition) []definition { whitespace := SequenceItem{Name: whitespaceName, Min: 0, Max: -1} for i, item := range s.items { + // TODO: there should not be max=0 if item.Max >= 0 && item.Max <= 1 { if i > 0 { items = append(items, whitespace) @@ -98,8 +99,13 @@ func applyWhitespaceToSeq(s *sequenceDefinition) []definition { continue } + optItems := []SequenceItem{singleItem, restItems} + if i > 0 { + optItems = []SequenceItem{whitespace, singleItem, restItems} + } + optName := patchName(item.Name, s.nodeName(), "wsopt", strconv.Itoa(i)) - optDef := newSequence(optName, Alias, []SequenceItem{whitespace, singleItem, restItems}) + optDef := newSequence(optName, Alias, optItems) defs = append(defs, optDef) items = append(items, SequenceItem{Name: optName, Min: 0, Max: 1}) } diff --git a/whitespace_test.go b/whitespace_test.go index 507f83f..bc8211d 100644 --- a/whitespace_test.go +++ b/whitespace_test.go @@ -8,9 +8,9 @@ const ( word-char:alias = [^\n, \t]; cell = (word-char (ws* word-char)*)?; rest-cell:alias = "," ws* cell; - line = cell ws* (rest-cell (ws* rest-cell)*)?; + line = cell (ws* rest-cell (ws* rest-cell)*)?; rest-line:alias = "\n" ws* line; - document = ws* (line ws* (rest-line (ws* rest-line)*)?)? ws*; + document = ws* (line (ws* rest-line (ws* rest-line)*)?)? ws*; ` csvWithWhitespaceSupport = ` @@ -23,7 +23,8 @@ const ( func TestCSVWhitespace(t *testing.T) { tests := []testItem{{ - title: "empty", + title: "empty", + ignorePosition: true, node: &Node{ Name: "document", },