fix bug with building empty items of sequences

This commit is contained in:
Arpad Ryszka 2017-11-04 22:08:15 +01:00
parent 930fb9f449
commit 60172b2f9f
11 changed files with 311 additions and 171 deletions

View File

@ -43,27 +43,28 @@ func checkNode(t *testing.T, ignorePosition bool, left, right *Node) {
return return
} }
if len(left.Nodes) != len(right.Nodes) { lnodes, rnodes := left.Nodes, right.Nodes
t.Error("length doesn't match", left.Name, len(left.Nodes), len(right.Nodes)) if len(lnodes) != len(rnodes) {
t.Error("length doesn't match", left.Name, len(lnodes), len(rnodes))
t.Log(left) t.Log(left)
t.Log(right) t.Log(right)
for { for {
if len(left.Nodes) > 0 { if len(lnodes) > 0 {
t.Log("<", left.Nodes[0]) t.Log("<", lnodes[0])
left.Nodes = left.Nodes[1:] lnodes = lnodes[1:]
} }
if len(right.Nodes) > 0 { if len(rnodes) > 0 {
t.Log(">", right.Nodes[0]) t.Log(">", rnodes[0])
right.Nodes = right.Nodes[1:] rnodes = rnodes[1:]
} }
if len(left.Nodes) == 0 && len(right.Nodes) == 0 { if len(lnodes) == 0 && len(rnodes) == 0 {
break break
} }
} }
return return
} }
checkNodes(t, ignorePosition, left.Nodes, right.Nodes) checkNodes(t, ignorePosition, lnodes, rnodes)
} }

View File

@ -50,12 +50,12 @@ func (d *choiceDefinition) validate(r *registry) error {
d.validated = true d.validated = true
for i := range d.options { for i := range d.options {
e, ok := r.definitions[d.options[i]] o, ok := r.definitions[d.options[i]]
if !ok { if !ok {
return parserNotFound(d.options[i]) return parserNotFound(d.options[i])
} }
if err := e.validate(r); err != nil { if err := o.validate(r); err != nil {
return err return err
} }
} }
@ -76,8 +76,8 @@ func (d *choiceDefinition) createBuilder() {
} }
func (d *choiceDefinition) initOptions(r *registry) { func (d *choiceDefinition) initOptions(r *registry) {
for _, e := range d.options { for _, o := range d.options {
def := r.definitions[e] def := r.definitions[o]
d.optionDefs = append(d.optionDefs, def) d.optionDefs = append(d.optionDefs, def)
def.init(r) def.init(r)
d.cbuilder.options = append(d.cbuilder.options, def.builder()) d.cbuilder.options = append(d.cbuilder.options, def.builder())
@ -101,8 +101,8 @@ func (d *choiceDefinition) addGeneralization(g int) {
} }
d.generalizations = append(d.generalizations, g) d.generalizations = append(d.generalizations, g)
for _, e := range d.optionDefs { for _, o := range d.optionDefs {
e.addGeneralization(g) o.addGeneralization(g)
} }
} }
@ -201,15 +201,25 @@ func (b *choiceBuilder) nodeName() string { return b.name }
func (b *choiceBuilder) nodeID() int { return b.id } func (b *choiceBuilder) nodeID() int { return b.id }
func (b *choiceBuilder) build(c *context) ([]*Node, bool) { func (b *choiceBuilder) build(c *context) ([]*Node, bool) {
to, ok := c.results.takeMatch(c.offset, b.id) to, ok := c.results.longestMatch(c.offset, b.id)
if !ok { if !ok {
return nil, false return nil, false
} }
if c.buildPending(c.offset, b.id, to) {
return nil, false
}
c.markBuildPending(c.offset, b.id, to)
if to-c.offset > 0 {
c.results.dropMatchTo(c.offset, b.id, to)
}
var option builder var option builder
for _, e := range b.options { for _, o := range b.options {
if c.results.hasMatchTo(c.offset, e.nodeID(), to) { if c.results.hasMatchTo(c.offset, o.nodeID(), to) {
option = e option = o
break break
} }
} }
@ -225,6 +235,8 @@ func (b *choiceBuilder) build(c *context) ([]*Node, bool) {
panic("damaged parse result") panic("damaged parse result")
} }
c.unmarkBuildPending(from, b.id, to)
if b.commit&Alias != 0 { if b.commit&Alias != 0 {
return n, true return n, true
} }

View File

@ -63,7 +63,7 @@ func (c *context) token() (rune, bool) {
return c.tokens[c.offset], true return c.tokens[c.offset], true
} }
func (c *context) pending(offset int, id int) bool { func (c *context) pending(offset, id int) bool {
if len(c.isPending) <= id { if len(c.isPending) <= id {
return false return false
} }
@ -77,7 +77,7 @@ func (c *context) pending(offset int, id int) bool {
return false return false
} }
func (c *context) markPending(offset int, id int) { func (c *context) markPending(offset, id int) {
if len(c.isPending) <= id { if len(c.isPending) <= id {
if cap(c.isPending) > id { if cap(c.isPending) > id {
c.isPending = c.isPending[:id+1] c.isPending = c.isPending[:id+1]
@ -99,7 +99,7 @@ func (c *context) markPending(offset int, id int) {
c.isPending[id] = append(c.isPending[id], offset) c.isPending[id] = append(c.isPending[id], offset)
} }
func (c *context) unmarkPending(offset int, id int) { func (c *context) unmarkPending(offset, id int) {
for i := range c.isPending[id] { for i := range c.isPending[id] {
if c.isPending[id][i] == offset { if c.isPending[id][i] == offset {
c.isPending[id][i] = -1 c.isPending[id][i] = -1
@ -108,8 +108,58 @@ func (c *context) unmarkPending(offset int, id int) {
} }
} }
func (c *context) resetPending() {
c.isPending = nil
}
func (c *context) buildPending(offset, id, to int) bool {
if len(c.isPending) <= id {
return false
}
for i := 0; i < len(c.isPending[id]); i += 2 {
if c.isPending[id][i] == offset && c.isPending[id][i+1] == to {
return true
}
}
return false
}
func (c *context) markBuildPending(offset, id, to int) {
if len(c.isPending) <= id {
if cap(c.isPending) > id {
c.isPending = c.isPending[:id+1]
} else {
c.isPending = c.isPending[:cap(c.isPending)]
for i := cap(c.isPending); i <= id; i++ {
c.isPending = append(c.isPending, nil)
}
}
}
for i := 0; i < len(c.isPending[id]); i += 2 {
if c.isPending[id][i] == -1 {
c.isPending[id][i] = offset
c.isPending[id][i+1] = to
return
}
}
c.isPending[id] = append(c.isPending[id], offset, to)
}
func (c *context) unmarkBuildPending(offset, id, to int) {
for i := 0; i < len(c.isPending[id]); i += 2 {
if c.isPending[id][i] == offset && c.isPending[id][i+1] == to {
c.isPending[id][i] = -1
break
}
}
}
func (c *context) fromResults(id int) bool { func (c *context) fromResults(id int) bool {
to, m, ok := c.results.getMatch(c.offset, id) to, m, ok := c.results.longestResult(c.offset, id)
if !ok { if !ok {
return false return false
} }
@ -138,7 +188,7 @@ func (c *context) finalizeParse(rootID int) error {
return ErrInvalidInput return ErrInvalidInput
} }
to, match, found := c.results.getMatch(0, rootID) to, match, found := c.results.longestResult(0, rootID)
if !found || !match || to < c.readOffset { if !found || !match || to < c.readOffset {
return ErrUnexpectedCharacter return ErrUnexpectedCharacter
} }

10
node.go
View File

@ -83,8 +83,14 @@ func (n *Node) commit(t []rune) {
} }
func (n *Node) String() string { func (n *Node) String() string {
if n.From >= len(n.tokens) || n.To > len(n.tokens) { if n.From >= len(n.tokens) && n.To != n.From || n.To > len(n.tokens) {
return n.Name + ":invalid" return fmt.Sprintf(
"%s:invalid:%d:%d:%d",
n.Name,
len(n.tokens),
n.From,
n.To,
)
} }
return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text()) return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text())

View File

@ -133,6 +133,34 @@ func TestRecursion(t *testing.T) {
}, },
}}, }},
) )
runTests(
t,
`A = "a" | A*`,
[]testItem{{
title: "recursive sequence in choice",
text: "aaaa",
ignorePosition: true,
node: &Node{
Name: "A",
Nodes: []*Node{{
Name: "A",
}, {
Name: "A",
Nodes: []*Node{{
Name: "A",
}, {
Name: "A",
Nodes: []*Node{{
Name: "A",
}, {
Name: "A",
}},
}},
}},
},
}},
)
} }
func TestSequence(t *testing.T) { func TestSequence(t *testing.T) {
@ -188,33 +216,24 @@ func TestSequence(t *testing.T) {
}, },
}}, }},
) )
}
func TestSequenceBug(t *testing.T) {
runTests( runTests(
t, t,
`A = "a" | A*`, `a = "a"?; A = a | a*`,
[]testItem{{ []testItem{{
title: "BUG: recursive sequence in choice", title: "single or zero-or-more optional in choice",
text: "aaa", text: "aaa",
ignorePosition: true,
node: &Node{ node: &Node{
Name: "A", Name: "A",
Nodes: []*Node{{ Nodes: []*Node{{
Name: "A", Name: "a",
}, { }, {
Name: "A", Name: "a",
Nodes: []*Node{{
Name: "A",
}, { }, {
Name: "A", Name: "a",
}, {
Name: "A",
}},
}, {
Name: "A",
}}, }},
}, },
ignorePosition: true,
}}, }},
) )
} }
@ -571,12 +590,14 @@ func TestUndefined(t *testing.T) {
n, err := s.Parse(bytes.NewBufferString("a = b")) n, err := s.Parse(bytes.NewBufferString("a = b"))
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return
} }
stest := &Syntax{} stest := &Syntax{}
err = define(stest, n) err = define(stest, n)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return
} }
if err := stest.Init(); err == nil { if err := stest.Init(); err == nil {
@ -628,6 +649,22 @@ func TestEmpty(t *testing.T) {
}, },
}}, }},
) )
runTests(
t,
`a = [a]*; a':alias = a; a'' = a' [^a]*`,
[]testItem{{
title: "no a",
text: "b",
ignorePosition: true,
node: &Node{
Name: "a''",
Nodes: []*Node{{
Name: "a",
}},
},
}},
)
} }
func TestCharAsRoot(t *testing.T) { func TestCharAsRoot(t *testing.T) {

View File

@ -5,49 +5,75 @@ type results struct {
match [][]int match [][]int
} }
func (s *results) getMatch(offset, id int) (int, bool, bool) { func (r *results) ensureOffset(offset int) {
if len(s.noMatch) > offset && s.noMatch[offset] != nil && s.noMatch[offset].has(id) { if len(r.match) > offset {
return 0, false, true return
} }
if len(s.match) <= offset { if cap(r.match) > offset {
return 0, false, false r.match = r.match[:offset+1]
return
} }
var ( r.match = r.match[:cap(r.match)]
found bool for i := len(r.match); i <= offset; i++ {
to int r.match = append(r.match, nil)
) }
}
for i := 0; i < len(s.match[offset]); i += 2 { func (r *results) setMatch(offset, id, to int) {
if s.match[offset][i] != id { r.ensureOffset(offset)
for i := 0; i < len(r.match[offset]); i += 2 {
if r.match[offset][i] != id || r.match[offset][i+1] != to {
continue continue
} }
found = true return
if s.match[offset][i+1] > to {
to = s.match[offset][i+1]
}
} }
return to, found, found r.match[offset] = append(r.match[offset], id, to)
} }
func (s *results) hasMatchTo(offset, id, to int) bool { func (r *results) setNoMatch(offset, id int) {
if len(s.noMatch) > offset && s.noMatch[offset] != nil && s.noMatch[offset].has(id) { if len(r.match) > offset {
return false for i := 0; i < len(r.match[offset]); i += 2 {
} if r.match[offset][i] != id {
if len(s.match) <= offset {
return false
}
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] != id {
continue continue
} }
if s.match[offset][i+1] == to { return
}
}
if len(r.noMatch) <= offset {
if cap(r.noMatch) > offset {
r.noMatch = r.noMatch[:offset+1]
} else {
r.noMatch = r.noMatch[:cap(r.noMatch)]
for i := cap(r.noMatch); i <= offset; i++ {
r.noMatch = append(r.noMatch, nil)
}
}
}
if r.noMatch[offset] == nil {
r.noMatch[offset] = &idSet{}
}
r.noMatch[offset].set(id)
}
func (r *results) hasMatchTo(offset, id, to int) bool {
if len(r.match) <= offset {
return false
}
for i := 0; i < len(r.match[offset]); i += 2 {
if r.match[offset][i] != id {
continue
}
if r.match[offset][i+1] == to {
return true return true
} }
} }
@ -55,91 +81,46 @@ func (s *results) hasMatchTo(offset, id, to int) bool {
return false return false
} }
func (s *results) takeMatch(offset, id int) (int, bool) { func (r *results) longestMatch(offset, id int) (int, bool) {
if len(s.match) <= offset { if len(r.match) <= offset {
return 0, false return 0, false
} }
var ( var found bool
found bool to := -1
to int for i := 0; i < len(r.match[offset]); i += 2 {
index int if r.match[offset][i] != id {
)
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] != id {
continue continue
} }
if s.match[offset][i+1] > to || !found { if r.match[offset][i+1] > to {
to = s.match[offset][i+1] to = r.match[offset][i+1]
index = i
} }
found = true found = true
} }
if found && to-offset > 0 {
s.match[offset][index] = -1
}
return to, found return to, found
} }
func (s *results) ensureOffset(offset int) { func (r *results) longestResult(offset, id int) (int, bool, bool) {
if len(s.match) > offset { if len(r.noMatch) > offset && r.noMatch[offset] != nil && r.noMatch[offset].has(id) {
return return 0, false, true
} }
if cap(s.match) > offset { to, ok := r.longestMatch(offset, id)
s.match = s.match[:offset+1] return to, ok, ok
return
} }
s.match = s.match[:cap(s.match)] func (r *results) dropMatchTo(offset, id, to int) {
for i := len(s.match); i <= offset; i++ { for i := 0; i < len(r.match[offset]); i += 2 {
s.match = append(s.match, nil) if r.match[offset][i] != id {
}
}
func (s *results) setMatch(offset, id, to int) {
s.ensureOffset(offset)
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] != id || s.match[offset][i+1] != to {
continue
}
return
}
s.match[offset] = append(s.match[offset], id, to)
}
func (s *results) setNoMatch(offset, id int) {
if len(s.match) > offset {
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] != id {
continue continue
} }
if r.match[offset][i+1] == to {
r.match[offset][i] = -1
return return
} }
} }
if len(s.noMatch) <= offset {
if cap(s.noMatch) > offset {
s.noMatch = s.noMatch[:offset+1]
} else {
s.noMatch = s.noMatch[:cap(s.noMatch)]
for i := cap(s.noMatch); i <= offset; i++ {
s.noMatch = append(s.noMatch, nil)
}
}
}
if s.noMatch[offset] == nil {
s.noMatch[offset] = &idSet{}
}
s.noMatch[offset].set(id)
} }

View File

@ -15,9 +15,18 @@ type testItem struct {
ignorePosition bool ignorePosition bool
} }
func runTestsSyntax(t *testing.T, s *Syntax, tests []testItem) { func runTestsGetSyntax(t *testing.T, getSyntax func(t *testing.T) *Syntax, tests []testItem) {
var s *Syntax
for _, test := range tests { for _, test := range tests {
t.Run(test.title, func(t *testing.T) { t.Run(test.title, func(t *testing.T) {
if s == nil {
s = getSyntax(t)
}
if t.Failed() {
return
}
b := bytes.NewBufferString(test.text) b := bytes.NewBufferString(test.text)
start := time.Now() start := time.Now()
@ -43,22 +52,32 @@ func runTestsSyntax(t *testing.T, s *Syntax, tests []testItem) {
} }
} }
func runTestsSyntax(t *testing.T, s *Syntax, tests []testItem) {
runTestsGetSyntax(t, func(*testing.T) *Syntax { return s }, tests)
}
func runTests(t *testing.T, syntax string, tests []testItem) { func runTests(t *testing.T, syntax string, tests []testItem) {
getSyntax := func(t *testing.T) *Syntax {
s, err := openSyntaxString(syntax) s, err := openSyntaxString(syntax)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return
} }
runTestsSyntax(t, s, tests) return s
}
runTestsGetSyntax(t, getSyntax, tests)
} }
func runTestsFile(t *testing.T, file string, tests []testItem) { func runTestsFile(t *testing.T, file string, tests []testItem) {
getSyntax := func(t *testing.T) *Syntax {
s, err := openSyntaxFile(file) s, err := openSyntaxFile(file)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return
} }
runTestsSyntax(t, s, tests) return s
}
runTestsGetSyntax(t, getSyntax, tests)
} }

View File

@ -184,6 +184,8 @@ func (p *sequenceParser) parse(c *context) {
// TODO: // TODO:
// - is it ok to parse before max range check? what if max=0 // - is it ok to parse before max range check? what if max=0
// - validate, normalize and document max=0 // - validate, normalize and document max=0
// TODO: test this f(g())
p.items[itemIndex].parse(c) p.items[itemIndex].parse(c)
if !c.matchLast { if !c.matchLast {
if currentCount < p.ranges[itemIndex][0] { if currentCount < p.ranges[itemIndex][0] {
@ -232,18 +234,27 @@ func (b *sequenceBuilder) nodeName() string { return b.name }
func (b *sequenceBuilder) nodeID() int { return b.id } func (b *sequenceBuilder) nodeID() int { return b.id }
func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
to, ok := c.results.takeMatch(c.offset, b.id) to, ok := c.results.longestMatch(c.offset, b.id)
if !ok { if !ok {
return nil, false return nil, false
} }
if to-c.offset == 0 && b.commit&Alias != 0 { if c.buildPending(c.offset, b.id, to) {
return nil, true return nil, false
}
c.markBuildPending(c.offset, b.id, to)
if to-c.offset > 0 {
c.results.dropMatchTo(c.offset, b.id, to)
} }
if b.allChars {
from := c.offset from := c.offset
if b.allChars {
c.offset = to c.offset = to
c.unmarkBuildPending(from, b.id, to)
if b.commit&Alias != 0 { if b.commit&Alias != 0 {
return nil, true return nil, true
} }
@ -256,7 +267,6 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
}}, true }}, true
} }
from := c.offset
var ( var (
itemIndex int itemIndex int
currentCount int currentCount int
@ -277,17 +287,32 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
} }
parsed := c.offset > itemFrom parsed := c.offset > itemFrom
if parsed || len(n) > 0 {
if parsed {
nodes = append(nodes, n...) nodes = append(nodes, n...)
currentCount++ currentCount++
} }
if !parsed || b.ranges[itemIndex][1] >= 0 && currentCount == b.ranges[itemIndex][1] { if !parsed {
if currentCount < b.ranges[itemIndex][0] {
for i := 0; i < b.ranges[itemIndex][0]-currentCount; i++ {
nodes = append(nodes, n...)
}
}
itemIndex++
currentCount = 0
continue
}
if b.ranges[itemIndex][1] >= 0 && currentCount == b.ranges[itemIndex][1] {
itemIndex++ itemIndex++
currentCount = 0 currentCount = 0
} }
} }
c.unmarkBuildPending(from, b.id, to)
if b.commit&Alias != 0 { if b.commit&Alias != 0 {
return nodes, true return nodes, true
} }

View File

@ -318,6 +318,8 @@ func (s *Syntax) Parse(r io.Reader) (*Node, error) {
} }
c.offset = 0 c.offset = 0
c.resetPending()
n, ok := s.builder.build(c) n, ok := s.builder.build(c)
if !ok || len(n) != 1 { if !ok || len(n) != 1 {
panic("damaged parse result") panic("damaged parse result")

View File

@ -66,6 +66,7 @@ func applyWhitespaceToSeq(s *sequenceDefinition) []definition {
whitespace := SequenceItem{Name: whitespaceName, Min: 0, Max: -1} whitespace := SequenceItem{Name: whitespaceName, Min: 0, Max: -1}
for i, item := range s.items { for i, item := range s.items {
// TODO: there should not be max=0
if item.Max >= 0 && item.Max <= 1 { if item.Max >= 0 && item.Max <= 1 {
if i > 0 { if i > 0 {
items = append(items, whitespace) items = append(items, whitespace)
@ -98,8 +99,13 @@ func applyWhitespaceToSeq(s *sequenceDefinition) []definition {
continue continue
} }
optItems := []SequenceItem{singleItem, restItems}
if i > 0 {
optItems = []SequenceItem{whitespace, singleItem, restItems}
}
optName := patchName(item.Name, s.nodeName(), "wsopt", strconv.Itoa(i)) optName := patchName(item.Name, s.nodeName(), "wsopt", strconv.Itoa(i))
optDef := newSequence(optName, Alias, []SequenceItem{whitespace, singleItem, restItems}) optDef := newSequence(optName, Alias, optItems)
defs = append(defs, optDef) defs = append(defs, optDef)
items = append(items, SequenceItem{Name: optName, Min: 0, Max: 1}) items = append(items, SequenceItem{Name: optName, Min: 0, Max: 1})
} }

View File

@ -8,9 +8,9 @@ const (
word-char:alias = [^\n, \t]; word-char:alias = [^\n, \t];
cell = (word-char (ws* word-char)*)?; cell = (word-char (ws* word-char)*)?;
rest-cell:alias = "," ws* cell; rest-cell:alias = "," ws* cell;
line = cell ws* (rest-cell (ws* rest-cell)*)?; line = cell (ws* rest-cell (ws* rest-cell)*)?;
rest-line:alias = "\n" ws* line; rest-line:alias = "\n" ws* line;
document = ws* (line ws* (rest-line (ws* rest-line)*)?)? ws*; document = ws* (line (ws* rest-line (ws* rest-line)*)?)? ws*;
` `
csvWithWhitespaceSupport = ` csvWithWhitespaceSupport = `
@ -24,6 +24,7 @@ const (
func TestCSVWhitespace(t *testing.T) { func TestCSVWhitespace(t *testing.T) {
tests := []testItem{{ tests := []testItem{{
title: "empty", title: "empty",
ignorePosition: true,
node: &Node{ node: &Node{
Name: "document", Name: "document",
}, },