fix bug with building empty items of sequences

This commit is contained in:
Arpad Ryszka 2017-11-04 22:08:15 +01:00
parent 930fb9f449
commit 60172b2f9f
11 changed files with 311 additions and 171 deletions

View File

@ -43,27 +43,28 @@ func checkNode(t *testing.T, ignorePosition bool, left, right *Node) {
return
}
if len(left.Nodes) != len(right.Nodes) {
t.Error("length doesn't match", left.Name, len(left.Nodes), len(right.Nodes))
lnodes, rnodes := left.Nodes, right.Nodes
if len(lnodes) != len(rnodes) {
t.Error("length doesn't match", left.Name, len(lnodes), len(rnodes))
t.Log(left)
t.Log(right)
for {
if len(left.Nodes) > 0 {
t.Log("<", left.Nodes[0])
left.Nodes = left.Nodes[1:]
if len(lnodes) > 0 {
t.Log("<", lnodes[0])
lnodes = lnodes[1:]
}
if len(right.Nodes) > 0 {
t.Log(">", right.Nodes[0])
right.Nodes = right.Nodes[1:]
if len(rnodes) > 0 {
t.Log(">", rnodes[0])
rnodes = rnodes[1:]
}
if len(left.Nodes) == 0 && len(right.Nodes) == 0 {
if len(lnodes) == 0 && len(rnodes) == 0 {
break
}
}
return
}
checkNodes(t, ignorePosition, left.Nodes, right.Nodes)
checkNodes(t, ignorePosition, lnodes, rnodes)
}

View File

@ -50,12 +50,12 @@ func (d *choiceDefinition) validate(r *registry) error {
d.validated = true
for i := range d.options {
e, ok := r.definitions[d.options[i]]
o, ok := r.definitions[d.options[i]]
if !ok {
return parserNotFound(d.options[i])
}
if err := e.validate(r); err != nil {
if err := o.validate(r); err != nil {
return err
}
}
@ -76,8 +76,8 @@ func (d *choiceDefinition) createBuilder() {
}
func (d *choiceDefinition) initOptions(r *registry) {
for _, e := range d.options {
def := r.definitions[e]
for _, o := range d.options {
def := r.definitions[o]
d.optionDefs = append(d.optionDefs, def)
def.init(r)
d.cbuilder.options = append(d.cbuilder.options, def.builder())
@ -101,8 +101,8 @@ func (d *choiceDefinition) addGeneralization(g int) {
}
d.generalizations = append(d.generalizations, g)
for _, e := range d.optionDefs {
e.addGeneralization(g)
for _, o := range d.optionDefs {
o.addGeneralization(g)
}
}
@ -201,15 +201,25 @@ func (b *choiceBuilder) nodeName() string { return b.name }
func (b *choiceBuilder) nodeID() int { return b.id }
func (b *choiceBuilder) build(c *context) ([]*Node, bool) {
to, ok := c.results.takeMatch(c.offset, b.id)
to, ok := c.results.longestMatch(c.offset, b.id)
if !ok {
return nil, false
}
if c.buildPending(c.offset, b.id, to) {
return nil, false
}
c.markBuildPending(c.offset, b.id, to)
if to-c.offset > 0 {
c.results.dropMatchTo(c.offset, b.id, to)
}
var option builder
for _, e := range b.options {
if c.results.hasMatchTo(c.offset, e.nodeID(), to) {
option = e
for _, o := range b.options {
if c.results.hasMatchTo(c.offset, o.nodeID(), to) {
option = o
break
}
}
@ -225,6 +235,8 @@ func (b *choiceBuilder) build(c *context) ([]*Node, bool) {
panic("damaged parse result")
}
c.unmarkBuildPending(from, b.id, to)
if b.commit&Alias != 0 {
return n, true
}

View File

@ -63,7 +63,7 @@ func (c *context) token() (rune, bool) {
return c.tokens[c.offset], true
}
func (c *context) pending(offset int, id int) bool {
func (c *context) pending(offset, id int) bool {
if len(c.isPending) <= id {
return false
}
@ -77,7 +77,7 @@ func (c *context) pending(offset int, id int) bool {
return false
}
func (c *context) markPending(offset int, id int) {
func (c *context) markPending(offset, id int) {
if len(c.isPending) <= id {
if cap(c.isPending) > id {
c.isPending = c.isPending[:id+1]
@ -99,7 +99,7 @@ func (c *context) markPending(offset int, id int) {
c.isPending[id] = append(c.isPending[id], offset)
}
func (c *context) unmarkPending(offset int, id int) {
func (c *context) unmarkPending(offset, id int) {
for i := range c.isPending[id] {
if c.isPending[id][i] == offset {
c.isPending[id][i] = -1
@ -108,8 +108,58 @@ func (c *context) unmarkPending(offset int, id int) {
}
}
func (c *context) resetPending() {
c.isPending = nil
}
func (c *context) buildPending(offset, id, to int) bool {
if len(c.isPending) <= id {
return false
}
for i := 0; i < len(c.isPending[id]); i += 2 {
if c.isPending[id][i] == offset && c.isPending[id][i+1] == to {
return true
}
}
return false
}
func (c *context) markBuildPending(offset, id, to int) {
if len(c.isPending) <= id {
if cap(c.isPending) > id {
c.isPending = c.isPending[:id+1]
} else {
c.isPending = c.isPending[:cap(c.isPending)]
for i := cap(c.isPending); i <= id; i++ {
c.isPending = append(c.isPending, nil)
}
}
}
for i := 0; i < len(c.isPending[id]); i += 2 {
if c.isPending[id][i] == -1 {
c.isPending[id][i] = offset
c.isPending[id][i+1] = to
return
}
}
c.isPending[id] = append(c.isPending[id], offset, to)
}
func (c *context) unmarkBuildPending(offset, id, to int) {
for i := 0; i < len(c.isPending[id]); i += 2 {
if c.isPending[id][i] == offset && c.isPending[id][i+1] == to {
c.isPending[id][i] = -1
break
}
}
}
func (c *context) fromResults(id int) bool {
to, m, ok := c.results.getMatch(c.offset, id)
to, m, ok := c.results.longestResult(c.offset, id)
if !ok {
return false
}
@ -138,7 +188,7 @@ func (c *context) finalizeParse(rootID int) error {
return ErrInvalidInput
}
to, match, found := c.results.getMatch(0, rootID)
to, match, found := c.results.longestResult(0, rootID)
if !found || !match || to < c.readOffset {
return ErrUnexpectedCharacter
}

10
node.go
View File

@ -83,8 +83,14 @@ func (n *Node) commit(t []rune) {
}
func (n *Node) String() string {
if n.From >= len(n.tokens) || n.To > len(n.tokens) {
return n.Name + ":invalid"
if n.From >= len(n.tokens) && n.To != n.From || n.To > len(n.tokens) {
return fmt.Sprintf(
"%s:invalid:%d:%d:%d",
n.Name,
len(n.tokens),
n.From,
n.To,
)
}
return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.From, n.To, n.Text())

View File

@ -133,6 +133,34 @@ func TestRecursion(t *testing.T) {
},
}},
)
runTests(
t,
`A = "a" | A*`,
[]testItem{{
title: "recursive sequence in choice",
text: "aaaa",
ignorePosition: true,
node: &Node{
Name: "A",
Nodes: []*Node{{
Name: "A",
}, {
Name: "A",
Nodes: []*Node{{
Name: "A",
}, {
Name: "A",
Nodes: []*Node{{
Name: "A",
}, {
Name: "A",
}},
}},
}},
},
}},
)
}
func TestSequence(t *testing.T) {
@ -188,33 +216,24 @@ func TestSequence(t *testing.T) {
},
}},
)
}
func TestSequenceBug(t *testing.T) {
runTests(
t,
`A = "a" | A*`,
`a = "a"?; A = a | a*`,
[]testItem{{
title: "BUG: recursive sequence in choice",
title: "single or zero-or-more optional in choice",
text: "aaa",
ignorePosition: true,
node: &Node{
Name: "A",
Nodes: []*Node{{
Name: "A",
Name: "a",
}, {
Name: "A",
Nodes: []*Node{{
Name: "A",
Name: "a",
}, {
Name: "A",
}, {
Name: "A",
}},
}, {
Name: "A",
Name: "a",
}},
},
ignorePosition: true,
}},
)
}
@ -571,12 +590,14 @@ func TestUndefined(t *testing.T) {
n, err := s.Parse(bytes.NewBufferString("a = b"))
if err != nil {
t.Error(err)
return
}
stest := &Syntax{}
err = define(stest, n)
if err != nil {
t.Error(err)
return
}
if err := stest.Init(); err == nil {
@ -628,6 +649,22 @@ func TestEmpty(t *testing.T) {
},
}},
)
runTests(
t,
`a = [a]*; a':alias = a; a'' = a' [^a]*`,
[]testItem{{
title: "no a",
text: "b",
ignorePosition: true,
node: &Node{
Name: "a''",
Nodes: []*Node{{
Name: "a",
}},
},
}},
)
}
func TestCharAsRoot(t *testing.T) {

View File

@ -5,49 +5,75 @@ type results struct {
match [][]int
}
func (s *results) getMatch(offset, id int) (int, bool, bool) {
if len(s.noMatch) > offset && s.noMatch[offset] != nil && s.noMatch[offset].has(id) {
return 0, false, true
func (r *results) ensureOffset(offset int) {
if len(r.match) > offset {
return
}
if len(s.match) <= offset {
return 0, false, false
if cap(r.match) > offset {
r.match = r.match[:offset+1]
return
}
var (
found bool
to int
)
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] != id {
continue
r.match = r.match[:cap(r.match)]
for i := len(r.match); i <= offset; i++ {
r.match = append(r.match, nil)
}
found = true
if s.match[offset][i+1] > to {
to = s.match[offset][i+1]
}
}
return to, found, found
}
func (s *results) hasMatchTo(offset, id, to int) bool {
if len(s.noMatch) > offset && s.noMatch[offset] != nil && s.noMatch[offset].has(id) {
return false
}
if len(s.match) <= offset {
return false
}
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] != id {
func (r *results) setMatch(offset, id, to int) {
r.ensureOffset(offset)
for i := 0; i < len(r.match[offset]); i += 2 {
if r.match[offset][i] != id || r.match[offset][i+1] != to {
continue
}
if s.match[offset][i+1] == to {
return
}
r.match[offset] = append(r.match[offset], id, to)
}
func (r *results) setNoMatch(offset, id int) {
if len(r.match) > offset {
for i := 0; i < len(r.match[offset]); i += 2 {
if r.match[offset][i] != id {
continue
}
return
}
}
if len(r.noMatch) <= offset {
if cap(r.noMatch) > offset {
r.noMatch = r.noMatch[:offset+1]
} else {
r.noMatch = r.noMatch[:cap(r.noMatch)]
for i := cap(r.noMatch); i <= offset; i++ {
r.noMatch = append(r.noMatch, nil)
}
}
}
if r.noMatch[offset] == nil {
r.noMatch[offset] = &idSet{}
}
r.noMatch[offset].set(id)
}
func (r *results) hasMatchTo(offset, id, to int) bool {
if len(r.match) <= offset {
return false
}
for i := 0; i < len(r.match[offset]); i += 2 {
if r.match[offset][i] != id {
continue
}
if r.match[offset][i+1] == to {
return true
}
}
@ -55,91 +81,46 @@ func (s *results) hasMatchTo(offset, id, to int) bool {
return false
}
func (s *results) takeMatch(offset, id int) (int, bool) {
if len(s.match) <= offset {
func (r *results) longestMatch(offset, id int) (int, bool) {
if len(r.match) <= offset {
return 0, false
}
var (
found bool
to int
index int
)
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] != id {
var found bool
to := -1
for i := 0; i < len(r.match[offset]); i += 2 {
if r.match[offset][i] != id {
continue
}
if s.match[offset][i+1] > to || !found {
to = s.match[offset][i+1]
index = i
if r.match[offset][i+1] > to {
to = r.match[offset][i+1]
}
found = true
}
if found && to-offset > 0 {
s.match[offset][index] = -1
}
return to, found
}
func (s *results) ensureOffset(offset int) {
if len(s.match) > offset {
return
func (r *results) longestResult(offset, id int) (int, bool, bool) {
if len(r.noMatch) > offset && r.noMatch[offset] != nil && r.noMatch[offset].has(id) {
return 0, false, true
}
if cap(s.match) > offset {
s.match = s.match[:offset+1]
return
}
s.match = s.match[:cap(s.match)]
for i := len(s.match); i <= offset; i++ {
s.match = append(s.match, nil)
}
to, ok := r.longestMatch(offset, id)
return to, ok, ok
}
func (s *results) setMatch(offset, id, to int) {
s.ensureOffset(offset)
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] != id || s.match[offset][i+1] != to {
continue
}
return
}
s.match[offset] = append(s.match[offset], id, to)
}
func (s *results) setNoMatch(offset, id int) {
if len(s.match) > offset {
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] != id {
func (r *results) dropMatchTo(offset, id, to int) {
for i := 0; i < len(r.match[offset]); i += 2 {
if r.match[offset][i] != id {
continue
}
if r.match[offset][i+1] == to {
r.match[offset][i] = -1
return
}
}
if len(s.noMatch) <= offset {
if cap(s.noMatch) > offset {
s.noMatch = s.noMatch[:offset+1]
} else {
s.noMatch = s.noMatch[:cap(s.noMatch)]
for i := cap(s.noMatch); i <= offset; i++ {
s.noMatch = append(s.noMatch, nil)
}
}
}
if s.noMatch[offset] == nil {
s.noMatch[offset] = &idSet{}
}
s.noMatch[offset].set(id)
}

View File

@ -15,9 +15,18 @@ type testItem struct {
ignorePosition bool
}
func runTestsSyntax(t *testing.T, s *Syntax, tests []testItem) {
func runTestsGetSyntax(t *testing.T, getSyntax func(t *testing.T) *Syntax, tests []testItem) {
var s *Syntax
for _, test := range tests {
t.Run(test.title, func(t *testing.T) {
if s == nil {
s = getSyntax(t)
}
if t.Failed() {
return
}
b := bytes.NewBufferString(test.text)
start := time.Now()
@ -43,22 +52,32 @@ func runTestsSyntax(t *testing.T, s *Syntax, tests []testItem) {
}
}
func runTestsSyntax(t *testing.T, s *Syntax, tests []testItem) {
runTestsGetSyntax(t, func(*testing.T) *Syntax { return s }, tests)
}
func runTests(t *testing.T, syntax string, tests []testItem) {
getSyntax := func(t *testing.T) *Syntax {
s, err := openSyntaxString(syntax)
if err != nil {
t.Error(err)
return
}
runTestsSyntax(t, s, tests)
return s
}
runTestsGetSyntax(t, getSyntax, tests)
}
func runTestsFile(t *testing.T, file string, tests []testItem) {
getSyntax := func(t *testing.T) *Syntax {
s, err := openSyntaxFile(file)
if err != nil {
t.Error(err)
return
}
runTestsSyntax(t, s, tests)
return s
}
runTestsGetSyntax(t, getSyntax, tests)
}

View File

@ -184,6 +184,8 @@ func (p *sequenceParser) parse(c *context) {
// TODO:
// - is it ok to parse before max range check? what if max=0
// - validate, normalize and document max=0
// TODO: test this f(g())
p.items[itemIndex].parse(c)
if !c.matchLast {
if currentCount < p.ranges[itemIndex][0] {
@ -232,18 +234,27 @@ func (b *sequenceBuilder) nodeName() string { return b.name }
func (b *sequenceBuilder) nodeID() int { return b.id }
func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
to, ok := c.results.takeMatch(c.offset, b.id)
to, ok := c.results.longestMatch(c.offset, b.id)
if !ok {
return nil, false
}
if to-c.offset == 0 && b.commit&Alias != 0 {
return nil, true
if c.buildPending(c.offset, b.id, to) {
return nil, false
}
c.markBuildPending(c.offset, b.id, to)
if to-c.offset > 0 {
c.results.dropMatchTo(c.offset, b.id, to)
}
if b.allChars {
from := c.offset
if b.allChars {
c.offset = to
c.unmarkBuildPending(from, b.id, to)
if b.commit&Alias != 0 {
return nil, true
}
@ -256,7 +267,6 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
}}, true
}
from := c.offset
var (
itemIndex int
currentCount int
@ -277,17 +287,32 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
}
parsed := c.offset > itemFrom
if parsed || len(n) > 0 {
if parsed {
nodes = append(nodes, n...)
currentCount++
}
if !parsed || b.ranges[itemIndex][1] >= 0 && currentCount == b.ranges[itemIndex][1] {
if !parsed {
if currentCount < b.ranges[itemIndex][0] {
for i := 0; i < b.ranges[itemIndex][0]-currentCount; i++ {
nodes = append(nodes, n...)
}
}
itemIndex++
currentCount = 0
continue
}
if b.ranges[itemIndex][1] >= 0 && currentCount == b.ranges[itemIndex][1] {
itemIndex++
currentCount = 0
}
}
c.unmarkBuildPending(from, b.id, to)
if b.commit&Alias != 0 {
return nodes, true
}

View File

@ -318,6 +318,8 @@ func (s *Syntax) Parse(r io.Reader) (*Node, error) {
}
c.offset = 0
c.resetPending()
n, ok := s.builder.build(c)
if !ok || len(n) != 1 {
panic("damaged parse result")

View File

@ -66,6 +66,7 @@ func applyWhitespaceToSeq(s *sequenceDefinition) []definition {
whitespace := SequenceItem{Name: whitespaceName, Min: 0, Max: -1}
for i, item := range s.items {
// TODO: there should not be max=0
if item.Max >= 0 && item.Max <= 1 {
if i > 0 {
items = append(items, whitespace)
@ -98,8 +99,13 @@ func applyWhitespaceToSeq(s *sequenceDefinition) []definition {
continue
}
optItems := []SequenceItem{singleItem, restItems}
if i > 0 {
optItems = []SequenceItem{whitespace, singleItem, restItems}
}
optName := patchName(item.Name, s.nodeName(), "wsopt", strconv.Itoa(i))
optDef := newSequence(optName, Alias, []SequenceItem{whitespace, singleItem, restItems})
optDef := newSequence(optName, Alias, optItems)
defs = append(defs, optDef)
items = append(items, SequenceItem{Name: optName, Min: 0, Max: 1})
}

View File

@ -8,9 +8,9 @@ const (
word-char:alias = [^\n, \t];
cell = (word-char (ws* word-char)*)?;
rest-cell:alias = "," ws* cell;
line = cell ws* (rest-cell (ws* rest-cell)*)?;
line = cell (ws* rest-cell (ws* rest-cell)*)?;
rest-line:alias = "\n" ws* line;
document = ws* (line ws* (rest-line (ws* rest-line)*)?)? ws*;
document = ws* (line (ws* rest-line (ws* rest-line)*)?)? ws*;
`
csvWithWhitespaceSupport = `
@ -24,6 +24,7 @@ const (
func TestCSVWhitespace(t *testing.T) {
tests := []testItem{{
title: "empty",
ignorePosition: true,
node: &Node{
Name: "document",
},