remove separate quantifier parser

This commit is contained in:
Arpad Ryszka 2017-06-25 23:38:32 +02:00
parent df6b9e99f4
commit b457d39101
12 changed files with 227 additions and 343 deletions

View File

@ -14,4 +14,4 @@ check: build
fmt: $(SOURCES) fmt: $(SOURCES)
@gofmt -w -s $(SOURCES) @gofmt -w -s $(SOURCES)
precommit: build check fmt precommit: fmt build check

48
boot.go
View File

@ -4,6 +4,7 @@ import (
"errors" "errors"
"os" "os"
"strconv" "strconv"
"strings"
) )
var errInvalidDefinition = errors.New("invalid syntax definition") var errInvalidDefinition = errors.New("invalid syntax definition")
@ -32,11 +33,6 @@ func checkBootDefinitionLength(d []string) error {
return errInvalidDefinition return errInvalidDefinition
} }
case "quantifier":
if len(d) != 6 {
return errInvalidDefinition
}
case "sequence", "choice": case "sequence", "choice":
if len(d) < 4 { if len(d) < 4 {
return errInvalidDefinition return errInvalidDefinition
@ -121,28 +117,42 @@ func defineBootCharSequence(s *Syntax, d []string) error {
return s.CharSequence(d[1], ct, chars) return s.CharSequence(d[1], ct, chars)
} }
func defineBootQuantifier(s *Syntax, d []string) error { func namesToSequenceItemsQuantify(n []string, quantify bool) []SequenceItem {
ct := stringToCommitType(d[2]) si := make([]SequenceItem, len(n))
for i, ni := range n {
var min, max int
if quantify {
nis := strings.Split(ni, ":")
if len(nis) == 3 {
ni = nis[0]
var ( var err error
min, max int
err error
)
if min, err = strconv.Atoi(d[4]); err != nil { min, err = strconv.Atoi(nis[1])
return err if err != nil {
panic(err)
}
max, err = strconv.Atoi(nis[2])
if err != nil {
panic(err)
}
}
}
si[i] = SequenceItem{Name: ni, Min: min, Max: max}
} }
if max, err = strconv.Atoi(d[5]); err != nil { return si
return err }
}
return s.Quantifier(d[1], ct, d[3], min, max) func namesToSequenceItems(n []string) []SequenceItem {
return namesToSequenceItemsQuantify(n, false)
} }
func defineBootSequence(s *Syntax, d []string) error { func defineBootSequence(s *Syntax, d []string) error {
ct := stringToCommitType(d[2]) ct := stringToCommitType(d[2])
return s.Sequence(d[1], ct, d[3:]...) return s.Sequence(d[1], ct, namesToSequenceItemsQuantify(d[3:], true)...)
} }
func defineBootChoice(s *Syntax, d []string) error { func defineBootChoice(s *Syntax, d []string) error {
@ -158,8 +168,6 @@ func defineBoot(s *Syntax, d []string) error {
return defineBootClass(s, d) return defineBootClass(s, d)
case "chars": case "chars":
return defineBootCharSequence(s, d) return defineBootCharSequence(s, d)
case "quantifier":
return defineBootQuantifier(s, d)
case "sequence": case "sequence":
return defineBootSequence(s, d) return defineBootSequence(s, d)
case "choice": case "choice":

View File

@ -7,7 +7,6 @@ import (
func TestBoot(t *testing.T) { func TestBoot(t *testing.T) {
var trace Trace var trace Trace
// trace = NewTrace(2)
b, err := initBoot(trace, bootDefinitions) b, err := initBoot(trace, bootDefinitions)
if err != nil { if err != nil {
@ -29,9 +28,11 @@ func TestBoot(t *testing.T) {
return return
} }
// trace = NewTrace(1)
s0 := NewSyntax(trace) s0 := NewSyntax(trace)
if err := define(s0, n0); err != nil { if err := define(s0, n0); err != nil {
t.Error(err) t.Error(err)
return
} }
_, err = f.Seek(0, 0) _, err = f.Seek(0, 0)
@ -40,6 +41,12 @@ func TestBoot(t *testing.T) {
return return
} }
err = s0.Init()
if err != nil {
t.Error(err)
return
}
n1, err := s0.Parse(f) n1, err := s0.Parse(f)
if err != nil { if err != nil {
t.Error(err) t.Error(err)

View File

@ -44,7 +44,7 @@ var bootDefinitions = [][]string{{
}, { }, {
"choice", "block-comment-char", "alias", "not-block-close", "not-star", "choice", "block-comment-char", "alias", "not-block-close", "not-star",
}, { }, {
"quantifier", "block-comment-body", "alias", "block-comment-char", "0", "-1", "sequence", "block-comment-body", "alias", "block-comment-char:0:-1",
}, { }, {
"sequence", "sequence",
"block-comment", "block-comment",
@ -53,15 +53,15 @@ var bootDefinitions = [][]string{{
"block-comment-body", "block-comment-body",
"close-block-comment", "close-block-comment",
}, { }, {
"quantifier", "not-nls", "alias", "not-nl", "0", "-1", "sequence", "not-nls", "alias", "not-nl:0:-1",
}, { }, {
"sequence", "line-comment", "alias", "double-slash", "not-nls", "sequence", "line-comment", "alias", "double-slash", "not-nls",
}, { }, {
"choice", "comment-segment", "alias", "block-comment", "line-comment", "choice", "comment-segment", "alias", "block-comment", "line-comment",
}, { }, {
"quantifier", "wss", "alias", "ws", "0", "-1", "sequence", "wss", "alias", "ws:0:-1",
}, { }, {
"quantifier", "optional-nl", "alias", "nl", "0", "1", "sequence", "optional-nl", "alias", "nl:0:1",
}, { }, {
"choice", "choice",
"ws-no-nl", "ws-no-nl",
@ -81,7 +81,7 @@ var bootDefinitions = [][]string{{
"ws-no-nl", "ws-no-nl",
"comment-segment", "comment-segment",
}, { }, {
"quantifier", "continue-comment", "alias", "continue-comment-segment", "0", "-1", "sequence", "continue-comment", "alias", "continue-comment-segment:0:-1",
}, { }, {
"sequence", "sequence",
"comment", "comment",
@ -91,7 +91,7 @@ var bootDefinitions = [][]string{{
}, { }, {
"choice", "wsc", "alias", "ws", "comment", "choice", "wsc", "alias", "ws", "comment",
}, { }, {
"quantifier", "wscs", "alias", "wsc", "0", "-1", "sequence", "wscs", "alias", "wsc:0:-1",
}, { }, {
"anything", "anything", "alias", "anything", "anything", "alias",
}, { }, {
@ -105,7 +105,7 @@ var bootDefinitions = [][]string{{
}, { }, {
"chars", "dash", "alias", "-", "chars", "dash", "alias", "-",
}, { }, {
"quantifier", "optional-class-not", "alias", "class-not", "0", "1", "sequence", "optional-class-not", "alias", "class-not:0:1",
}, { }, {
"class", "not-class-control", "alias", "^\\\\\\[\\]\\^\\-", "class", "not-class-control", "alias", "^\\\\\\[\\]\\^\\-",
}, { }, {
@ -119,7 +119,7 @@ var bootDefinitions = [][]string{{
}, { }, {
"choice", "char-or-range", "alias", "class-char", "char-range", "choice", "char-or-range", "alias", "class-char", "char-range",
}, { }, {
"quantifier", "chars-or-ranges", "alias", "char-or-range", "0", "-1", "sequence", "chars-or-ranges", "alias", "char-or-range:0:-1",
}, { }, {
"sequence", "char-class", "none", "open-square", "optional-class-not", "chars-or-ranges", "close-square", "sequence", "char-class", "none", "open-square", "optional-class-not", "chars-or-ranges", "close-square",
}, { }, {
@ -129,7 +129,7 @@ var bootDefinitions = [][]string{{
}, { }, {
"choice", "sequence-char", "none", "not-char-sequence-control", "escaped-char", "choice", "sequence-char", "none", "not-char-sequence-control", "escaped-char",
}, { }, {
"quantifier", "char-sequence-chars", "alias", "sequence-char", "0", "-1", "sequence", "char-sequence-chars", "alias", "sequence-char:0:-1",
}, { }, {
"sequence", "char-sequence", "none", "double-quote", "char-sequence-chars", "double-quote", "sequence", "char-sequence", "none", "double-quote", "char-sequence-chars", "double-quote",
}, { }, {
@ -137,7 +137,7 @@ var bootDefinitions = [][]string{{
}, { }, {
"class", "symbol-char", "alias", "^\\\\ \\n\\t\\b\\f\\r\\v\\b/.\\[\\]\\\"{}\\^+*?|():=;", "class", "symbol-char", "alias", "^\\\\ \\n\\t\\b\\f\\r\\v\\b/.\\[\\]\\\"{}\\^+*?|():=;",
}, { }, {
"quantifier", "symbol-chars", "alias", "symbol-char", "1", "-1", "sequence", "symbol-chars", "alias", "symbol-char:1:-1",
}, { }, {
"sequence", "symbol", "none", "symbol-chars", "sequence", "symbol", "none", "symbol-chars",
}, { }, {
@ -153,7 +153,7 @@ var bootDefinitions = [][]string{{
}, { }, {
"class", "digit", "alias", "0-9", "class", "digit", "alias", "0-9",
}, { }, {
"quantifier", "number", "alias", "digit", "1", "-1", "sequence", "number", "alias", "digit:1:-1",
}, { }, {
"sequence", "count", "none", "number", "sequence", "count", "none", "number",
}, { }, {
@ -194,23 +194,23 @@ var bootDefinitions = [][]string{{
}, { }, {
"choice", "quantifiable", "alias", "terminal", "symbol", "group", "choice", "quantifiable", "alias", "terminal", "symbol", "group",
}, { }, {
"sequence", "quantifier", "none", "quantifiable", "wscs", "quantity", "choice", "item-choice", "alias", "terminal", "symbol", "group",
}, { }, {
"choice", "item", "alias", "terminal", "symbol", "group", "quantifier", "sequence", "item", "none", "item-choice", "quantity:0:1",
}, { }, {
"sequence", "item-continue", "alias", "wscs", "item", "sequence", "item-continue", "alias", "wscs", "item",
}, { }, {
"quantifier", "items-continue", "alias", "item-continue", "0", "-1", "sequence", "items-continue", "alias", "item-continue:0:-1",
}, { }, {
"sequence", "sequence", "none", "item", "items-continue", "sequence", "sequence", "none", "item", "items-continue",
}, { }, {
"choice", "element", "alias", "terminal", "symbol", "group", "quantifier", "sequence", "choice", "element", "alias", "terminal", "symbol", "group", "sequence",
}, { }, {
"chars", "pipe", "alias", "|", "chars", "pipe", "alias", "|",
}, { }, {
"sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element", "sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element",
}, { }, {
"quantifier", "elements-continue", "alias", "element-continue", "1", "-1", "sequence", "elements-continue", "alias", "element-continue:1:-1",
}, { }, {
"sequence", "choice", "none", "element", "elements-continue", "sequence", "choice", "none", "element", "elements-continue",
}, { }, {
@ -220,7 +220,6 @@ var bootDefinitions = [][]string{{
"terminal", "terminal",
"symbol", "symbol",
"group", "group",
"quantifier",
"sequence", "sequence",
"choice", "choice",
}, { }, {
@ -236,7 +235,7 @@ var bootDefinitions = [][]string{{
}, { }, {
"sequence", "flag-tag", "alias", "colon", "flag", "sequence", "flag-tag", "alias", "colon", "flag",
}, { }, {
"quantifier", "flags", "alias", "flag-tag", "0", "-1", "sequence", "flags", "alias", "flag-tag:0:-1",
}, { }, {
"chars", "equal", "alias", "=", "chars", "equal", "alias", "=",
}, { }, {
@ -246,7 +245,7 @@ var bootDefinitions = [][]string{{
}, { }, {
"choice", "wsc-or-semicolon", "alias", "wsc", "semicolon", "choice", "wsc-or-semicolon", "alias", "wsc", "semicolon",
}, { }, {
"quantifier", "wsc-or-semicolons", "alias", "wsc-or-semicolon", "0", "-1", "sequence", "wsc-or-semicolons", "alias", "wsc-or-semicolon:0:-1",
}, { }, {
"sequence", "sequence",
"subsequent-definition", "subsequent-definition",
@ -256,12 +255,10 @@ var bootDefinitions = [][]string{{
"wsc-or-semicolons", "wsc-or-semicolons",
"definition", "definition",
}, { }, {
"quantifier", "sequence",
"subsequent-definitions", "subsequent-definitions",
"alias", "alias",
"subsequent-definition", "subsequent-definition:0:-1",
"0",
"-1",
}, { }, {
"sequence", "sequence",
"definitions", "definitions",
@ -269,12 +266,10 @@ var bootDefinitions = [][]string{{
"definition", "definition",
"subsequent-definitions", "subsequent-definitions",
}, { }, {
"quantifier", "sequence",
"opt-definitions", "opt-definitions",
"alias", "alias",
"definitions", "definitions:0:1",
"0",
"1",
}, { }, {
"sequence", "sequence",
"syntax", "syntax",

28
char.go
View File

@ -1,9 +1,10 @@
package parse package parse
// TODO: rename to token
type charParser struct { type charParser struct {
name string name string
commit CommitType commit CommitType
any bool
not bool not bool
chars []rune chars []rune
ranges [][]rune ranges [][]rune
@ -13,14 +14,13 @@ type charParser struct {
func newChar( func newChar(
name string, name string,
ct CommitType, ct CommitType,
any, not bool, not bool,
chars []rune, chars []rune,
ranges [][]rune, ranges [][]rune,
) *charParser { ) *charParser {
return &charParser{ return &charParser{
name: name, name: name,
commit: ct, commit: ct,
any: any,
not: not, not: not,
chars: chars, chars: chars,
ranges: ranges, ranges: ranges,
@ -31,7 +31,11 @@ func (p *charParser) nodeName() string { return p.name }
func (p *charParser) parser(r *registry, path []string) (parser, error) { func (p *charParser) parser(r *registry, path []string) (parser, error) {
if stringsContain(path, p.name) { if stringsContain(path, p.name) {
panic(errCannotIncludeParsers) panic(cannotIncludeParsers(p.name))
}
if _, ok := r.parser(p.name); ok {
return p, nil
} }
r.setParser(p) r.setParser(p)
@ -42,23 +46,19 @@ func (p *charParser) commitType() CommitType {
return p.commit return p.commit
} }
func (p *charParser) setIncludedBy(i parser, path []string) { func (p *charParser) setIncludedBy(including parser, path []string) {
if stringsContain(path, p.name) { if stringsContain(path, p.name) {
panic(errCannotIncludeParsers) panic(cannotIncludeParsers(p.name))
} }
p.includedBy = append(p.includedBy, i) p.includedBy = append(p.includedBy, including)
} }
func (p *charParser) cacheIncluded(*context, *Node) { func (p *charParser) cacheIncluded(*context, *Node) {
panic(errCannotIncludeParsers) panic(cannotIncludeParsers(p.name))
} }
func (p *charParser) match(t rune) bool { func (p *charParser) match(t rune) bool {
if p.any {
return true
}
for _, ci := range p.chars { for _, ci := range p.chars {
if ci == t { if ci == t {
return !p.not return !p.not
@ -93,8 +93,8 @@ func (p *charParser) parse(t Trace, c *context) {
t.Out1("success", string(tok)) t.Out1("success", string(tok))
n := newNode(p.name, p.commit, c.offset, c.offset+1) n := newNode(p.name, p.commit, c.offset, c.offset+1)
c.cache.set(c.offset, p.name, n) c.cache.set(c.offset, p.name, n)
for _, i := range p.includedBy { for _, including := range p.includedBy {
i.cacheIncluded(c, n) including.cacheIncluded(c, n)
} }
c.success(n) c.success(n)

View File

@ -106,19 +106,25 @@ func nodeChar(n *Node) rune {
return toRune(s) return toRune(s)
} }
func defineMember(s *Syntax, defaultName string, n *Node) (string, error) {
switch n.Name {
case "symbol":
return n.Text(), nil
default:
return defaultName, defineExpression(s, defaultName, Alias, n)
}
}
func defineMembers(s *Syntax, name string, n ...*Node) ([]string, error) { func defineMembers(s *Syntax, name string, n ...*Node) ([]string, error) {
var refs []string var refs []string
for i, ni := range n { for i, ni := range n {
nmi := childName(name, i) nmi := childName(name, i)
switch ni.Name { ref, err := defineMember(s, nmi, ni)
case "symbol": if err != nil {
refs = append(refs, ni.Text()) return nil, err
default:
refs = append(refs, nmi)
if err := defineExpression(s, nmi, Alias, ni); err != nil {
return nil, err
}
} }
refs = append(refs, ref)
} }
return refs, nil return refs, nil
@ -156,38 +162,33 @@ func defineCharSequence(s *Syntax, name string, ct CommitType, charNodes []*Node
return s.CharSequence(name, ct, chars) return s.CharSequence(name, ct, chars)
} }
func defineQuantifier(s *Syntax, name string, ct CommitType, n *Node, q *Node) error { func getQuantity(n *Node) (min int, max int, err error) {
refs, err := defineMembers(s, name, n) switch n.Name {
if err != nil {
return err
}
var min, max int
switch q.Name {
case "count-quantifier": case "count-quantifier":
min, err = strconv.Atoi(q.Nodes[0].Text()) min, err = strconv.Atoi(n.Nodes[0].Text())
if err != nil { if err != nil {
return err return
} }
max = min max = min
case "range-quantifier": case "range-quantifier":
min = 0 min = 0
max = -1 max = -1
for _, rq := range q.Nodes { for _, rq := range n.Nodes {
switch rq.Name { switch rq.Name {
case "range-from": case "range-from":
min, err = strconv.Atoi(rq.Text()) min, err = strconv.Atoi(rq.Text())
if err != nil { if err != nil {
return err return
} }
case "range-to": case "range-to":
max, err = strconv.Atoi(rq.Text()) max, err = strconv.Atoi(rq.Text())
if err != nil { if err != nil {
return err return
} }
default: default:
return ErrInvalidSyntax err = ErrInvalidSyntax
return
} }
} }
case "one-or-more": case "one-or-more":
@ -198,23 +199,42 @@ func defineQuantifier(s *Syntax, name string, ct CommitType, n *Node, q *Node) e
min, max = 0, 1 min, max = 0, 1
} }
return s.Quantifier(name, ct, refs[0], min, max) return
}
func defineSymbol(s *Syntax, name string, ct CommitType, n *Node) error {
return s.Sequence(name, ct, SequenceItem{Name: n.Text()})
} }
func defineSequence(s *Syntax, name string, ct CommitType, n ...*Node) error { func defineSequence(s *Syntax, name string, ct CommitType, n ...*Node) error {
refs, err := defineMembers(s, name, n...) var items []SequenceItem
if err != nil { for i, ni := range n {
return err if ni.Name != "item" || len(ni.Nodes) == 0 {
return ErrInvalidSyntax
}
var (
item SequenceItem
err error
)
defaultName := childName(name, i)
item.Name, err = defineMember(s, defaultName, ni.Nodes[0])
if err != nil {
return err
}
if len(ni.Nodes) == 2 {
item.Min, item.Max, err = getQuantity(ni.Nodes[1])
if err != nil {
return err
}
}
items = append(items, item)
} }
// // TODO: try to make this expressed in the syntax (maybe as sequences need either a quantififer or not return s.Sequence(name, ct, items...)
// // one item? or by maintaining the excluded and caching in the sequence in a similar way when there is
// // only one item?) how does this effect the quantifiers?
// if len(refs) == 1 {
// return s.Choice(name, ct, refs[0])
// }
return s.Sequence(name, ct, refs...)
} }
func defineChoice(s *Syntax, name string, ct CommitType, n ...*Node) error { func defineChoice(s *Syntax, name string, ct CommitType, n ...*Node) error {
@ -236,9 +256,7 @@ func defineExpression(s *Syntax, name string, ct CommitType, expression *Node) e
case "char-sequence": case "char-sequence":
err = defineCharSequence(s, name, ct, expression.Nodes) err = defineCharSequence(s, name, ct, expression.Nodes)
case "symbol": case "symbol":
err = defineSequence(s, name, ct, expression) err = defineSymbol(s, name, ct, expression)
case "quantifier":
err = defineQuantifier(s, name, ct, expression.Nodes[0], expression.Nodes[1])
case "sequence": case "sequence":
err = defineSequence(s, name, ct, expression.Nodes...) err = defineSequence(s, name, ct, expression.Nodes...)
case "choice": case "choice":

View File

@ -1,9 +1,6 @@
package parse package parse
import ( import "fmt"
"errors"
"fmt"
)
type definition interface { type definition interface {
nodeName() string nodeName() string
@ -18,12 +15,14 @@ type parser interface {
parse(Trace, *context) parse(Trace, *context)
} }
var errCannotIncludeParsers = errors.New("cannot include parsers")
func parserNotFound(name string) error { func parserNotFound(name string) error {
return fmt.Errorf("parser not found: %s", name) return fmt.Errorf("parser not found: %s", name)
} }
func cannotIncludeParsers(name string) error {
return fmt.Errorf("parser: %s cannot include other parsers", name)
}
func stringsContain(ss []string, s string) bool { func stringsContain(ss []string, s string) bool {
for _, si := range ss { for _, si := range ss {
if si == s { if si == s {

View File

@ -358,9 +358,10 @@ func TestSequence(t *testing.T) {
}}, }},
) )
testString( testStringTrace(
t, t,
`A = "a" | (A?)*`, `A = "a" | (A?)*`,
1,
[]testItem{{ []testItem{{
msg: "sequence in choice with redundant quantifier", msg: "sequence in choice with redundant quantifier",
text: "aaa", text: "aaa",
@ -397,15 +398,15 @@ func TestQuantifiers(t *testing.T) {
t, t,
`A = "a" "b"{0} "a"`, `A = "a" "b"{0} "a"`,
[]testItem{{ []testItem{{
msg: "zero", msg: "zero, considered as one",
text: "aa", text: "aba",
node: &Node{ node: &Node{
Name: "A", Name: "A",
to: 2, to: 3,
}, },
}, { }, {
msg: "zero, fail", msg: "zero, fail",
text: "aba", text: "aa",
fail: true, fail: true,
}}, }},
) )
@ -642,10 +643,9 @@ func TestQuantifiers(t *testing.T) {
}}, }},
) )
testStringTrace( testString(
t, t,
`A = "a" "b"{0,} "a"`, `A = "a" "b"{0,} "a"`,
1,
[]testItem{{ []testItem{{
msg: "zero or more, explicit, missing", msg: "zero or more, explicit, missing",
text: "aa", text: "aa",
@ -663,10 +663,9 @@ func TestQuantifiers(t *testing.T) {
}}, }},
) )
testStringTrace( testString(
t, t,
`A = "a" "b"* "a"`, `A = "a" "b"* "a"`,
1,
[]testItem{{ []testItem{{
msg: "zero or more, shortcut, missing", msg: "zero or more, shortcut, missing",
text: "aa", text: "aa",
@ -684,10 +683,9 @@ func TestQuantifiers(t *testing.T) {
}}, }},
) )
testStringTrace( testString(
t, t,
`A = "a" "b"{1,} "a"`, `A = "a" "b"{1,} "a"`,
1,
[]testItem{{ []testItem{{
msg: "one or more, explicit, missing", msg: "one or more, explicit, missing",
text: "aa", text: "aa",
@ -702,10 +700,9 @@ func TestQuantifiers(t *testing.T) {
}}, }},
) )
testStringTrace( testString(
t, t,
`A = "a" "b"+ "a"`, `A = "a" "b"+ "a"`,
1,
[]testItem{{ []testItem{{
msg: "one or more, shortcut, missing", msg: "one or more, shortcut, missing",
text: "aa", text: "aa",
@ -720,10 +717,9 @@ func TestQuantifiers(t *testing.T) {
}}, }},
) )
testStringTrace( testString(
t, t,
`A = "a" "b"{3,} "a"`, `A = "a" "b"{3,} "a"`,
1,
[]testItem{{ []testItem{{
msg: "three or more, explicit, missing", msg: "three or more, explicit, missing",
text: "abba", text: "abba",

View File

@ -1,172 +0,0 @@
package parse
type quantifierDefinition struct {
name string
commit CommitType
min, max int
item string
}
type quantifierParser struct {
name string
commit CommitType
min, max int
item parser
includedBy []parser
}
func newQuantifier(name string, ct CommitType, item string, min, max int) *quantifierDefinition {
return &quantifierDefinition{
name: name,
commit: ct,
min: min,
max: max,
item: item,
}
}
func (d *quantifierDefinition) nodeName() string { return d.name }
func (d *quantifierDefinition) parser(r *registry, path []string) (parser, error) {
if stringsContain(path, d.name) {
panic(errCannotIncludeParsers)
}
p, ok := r.parser(d.name)
if ok {
return p, nil
}
qp := &quantifierParser{
name: d.name,
commit: d.commit,
min: d.min,
max: d.max,
}
r.setParser(qp)
item, ok := r.parser(d.item)
if !ok {
itemDefinition, ok := r.definition(d.item)
if !ok {
return nil, parserNotFound(d.item)
}
var err error
item, err = itemDefinition.parser(r, path)
if err != nil {
return nil, err
}
}
qp.item = item
return qp, nil
}
func (d *quantifierDefinition) commitType() CommitType { return d.commit }
func (p *quantifierParser) nodeName() string { return p.name }
// TODO: merge the quantifier into the sequence
// DOC: sequences are hungry and are not revisited, a*a cannot match anything.
// DOC: how to match a tailing a? (..)*a | .(..)*a
func (p *quantifierParser) setIncludedBy(i parser, path []string) {
if stringsContain(path, p.name) {
panic(errCannotIncludeParsers)
}
p.includedBy = append(p.includedBy, i)
}
func (p *quantifierParser) cacheIncluded(*context, *Node) {
panic(errCannotIncludeParsers)
}
func (p *quantifierParser) parse(t Trace, c *context) {
t = t.Extend(p.name)
t.Out1("parsing quantifier", c.offset)
if p.commit&Documentation != 0 {
t.Out1("fail, doc")
c.fail(c.offset)
return
}
if c.excluded(c.offset, p.name) {
t.Out1("excluded")
c.fail(c.offset)
return
}
c.exclude(c.offset, p.name)
defer c.include(c.offset, p.name)
node := newNode(p.name, p.commit, c.offset, c.offset)
// this way of checking the cache definitely needs the testing of the russ cox form
for {
if p.max >= 0 && node.nodeLength() == p.max {
t.Out1("success, max reached")
c.cache.set(node.from, p.name, node)
for _, i := range p.includedBy {
i.cacheIncluded(c, node)
}
c.success(node)
return
}
t.Out2("next quantifier item")
// n, m, ok := c.cache.get(c.offset, p.item.nodeName())
m, ok := c.fromCache(p.item.nodeName())
if ok {
t.Out1("quantifier item found in cache, match:", m, c.offset, c.node.tokenLength())
if m {
node.append(c.node)
if c.node.tokenLength() > 0 {
t.Out2("taking next after cached found")
continue
}
}
if node.nodeLength() >= p.min {
t.Out1("success, no more match")
c.cache.set(node.from, p.name, node)
for _, i := range p.includedBy {
i.cacheIncluded(c, node)
}
c.success(node)
} else {
t.Out1("fail, min not reached")
c.cache.set(node.from, p.name, nil)
c.fail(node.from)
}
return
}
p.item.parse(t, c)
if !c.match || c.node.tokenLength() == 0 {
if node.nodeLength() >= p.min {
t.Out1("success, no more match")
c.cache.set(node.from, p.name, node)
for _, i := range p.includedBy {
i.cacheIncluded(c, node)
}
c.success(node)
} else {
t.Out1("fail, min not reached")
c.cache.set(node.from, p.name, nil)
c.fail(node.from)
}
return
}
node.append(c.node)
}
}

View File

@ -3,17 +3,18 @@ package parse
type sequenceDefinition struct { type sequenceDefinition struct {
name string name string
commit CommitType commit CommitType
items []string items []SequenceItem
} }
type sequenceParser struct { type sequenceParser struct {
name string name string
commit CommitType commit CommitType
items []parser items []parser
ranges [][]int
including []parser including []parser
} }
func newSequence(name string, ct CommitType, items []string) *sequenceDefinition { func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition {
return &sequenceDefinition{ return &sequenceDefinition{
name: name, name: name,
commit: ct, commit: ct,
@ -25,7 +26,7 @@ func (d *sequenceDefinition) nodeName() string { return d.name }
func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error) { func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error) {
if stringsContain(path, d.name) { if stringsContain(path, d.name) {
panic(errCannotIncludeParsers) panic(cannotIncludeParsers(d.name))
} }
p, ok := r.parser(d.name) p, ok := r.parser(d.name)
@ -40,34 +41,47 @@ func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error)
r.setParser(sp) r.setParser(sp)
var items []parser var (
items []parser
ranges [][]int
)
path = append(path, d.name) path = append(path, d.name)
for _, name := range d.items { for _, item := range d.items {
item, ok := r.parser(name) if item.Min == 0 && item.Max == 0 {
item.Min, item.Max = 1, 1
} else if item.Max == 0 {
item.Max = -1
}
pi, ok := r.parser(item.Name)
if ok { if ok {
items = append(items, item) items = append(items, pi)
ranges = append(ranges, []int{item.Min, item.Max})
continue continue
} }
itemDefinition, ok := r.definition(name) itemDefinition, ok := r.definition(item.Name)
if !ok { if !ok {
return nil, parserNotFound(name) return nil, parserNotFound(item.Name)
} }
item, err := itemDefinition.parser(r, path) pi, err := itemDefinition.parser(r, path)
if err != nil { if err != nil {
return nil, err return nil, err
} }
items = append(items, item) items = append(items, pi)
ranges = append(ranges, []int{item.Min, item.Max})
} }
// for single items, acts like a choice // for single items, acts like a choice
if len(items) == 1 { if len(items) == 1 && ranges[0][0] == 1 && ranges[0][1] == 1 {
items[0].setIncludedBy(sp, path) items[0].setIncludedBy(sp, path)
} }
sp.items = items sp.items = items
sp.ranges = ranges
return sp, nil return sp, nil
} }
@ -112,6 +126,8 @@ e = b | d
input: 111 input: 111
*/ */
// TODO: apply the quantifier migration to the syntax
func (p *sequenceParser) parse(t Trace, c *context) { func (p *sequenceParser) parse(t Trace, c *context) {
t = t.Extend(p.name) t = t.Extend(p.name)
t.Out1("parsing sequence", c.offset) t.Out1("parsing sequence", c.offset)
@ -122,8 +138,6 @@ func (p *sequenceParser) parse(t Trace, c *context) {
return return
} }
// TODO: maybe we can check the cache here? no because that would exclude the continuations
if c.excluded(c.offset, p.name) { if c.excluded(c.offset, p.name) {
t.Out1("excluded") t.Out1("excluded")
c.fail(c.offset) c.fail(c.offset)
@ -134,54 +148,75 @@ func (p *sequenceParser) parse(t Trace, c *context) {
defer c.include(c.offset, p.name) defer c.include(c.offset, p.name)
items := p.items items := p.items
ranges := p.ranges
var currentCount int
node := newNode(p.name, p.commit, c.offset, c.offset) node := newNode(p.name, p.commit, c.offset, c.offset)
for len(items) > 0 { for len(items) > 0 {
t.Out2("next sequence item")
// n, m, ok := c.cache.get(c.offset, items[0].nodeName())
m, ok := c.fromCache(items[0].nodeName()) m, ok := c.fromCache(items[0].nodeName())
if ok { if ok {
t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset) t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset)
if m { if m {
t.Out2("sequence item from cache:", c.node.Name, len(c.node.Nodes), c.node.from) if c.node.tokenLength() > 0 {
node.append(c.node) node.append(c.node)
items = items[1:] currentCount++
}
if c.node.tokenLength() == 0 || ranges[0][1] >= 0 && currentCount == ranges[0][1] {
items = items[1:]
ranges = ranges[1:]
currentCount = 0
}
continue continue
} }
c.cache.set(node.from, p.name, nil) if currentCount < ranges[0][0] {
c.fail(node.from) c.cache.set(node.from, p.name, nil)
return c.fail(node.from)
return
}
items = items[1:]
ranges = ranges[1:]
currentCount = 0
continue
} }
items[0].parse(t, c) items[0].parse(t, c)
items = items[1:]
if !c.match { if !c.match {
t.Out1("fail, item failed") if currentCount < ranges[0][0] {
c.cache.set(node.from, p.name, nil) t.Out1("fail, item failed")
c.fail(node.from) c.cache.set(node.from, p.name, nil)
return c.fail(node.from)
return
}
items = items[1:]
ranges = ranges[1:]
currentCount = 0
continue
} }
if c.node.tokenLength() > 0 { if c.node.tokenLength() > 0 {
t.Out2("appending sequence item", c.node.Name, len(c.node.Nodes))
node.append(c.node) node.append(c.node)
currentCount++
}
if c.node.tokenLength() == 0 || ranges[0][1] >= 0 && currentCount == ranges[0][1] {
items = items[1:]
ranges = ranges[1:]
currentCount = 0
} }
} }
t.Out1("success, items parsed") t.Out1("success, items parsed")
t.Out2("nodes", node.nodeLength())
if node.Name == "group" {
t.Out2("caching group", node.from, node.Nodes[2].Name, node.Nodes[2].nodeLength())
}
// is this cached item ever taken?
c.cache.set(node.from, p.name, node) c.cache.set(node.from, p.name, node)
for _, i := range p.including { for _, i := range p.including {
i.cacheIncluded(c, node) i.cacheIncluded(c, node)
} }
t.Out2("caching sequence and included by done")
c.success(node) c.success(node)
} }

View File

@ -16,6 +16,11 @@ const (
Root Root
) )
type SequenceItem struct {
Name string
Min, Max int // 0,0 considered as 1,1, x,0 considered as x,-1
}
type Syntax struct { type Syntax struct {
trace Trace trace Trace
registry *registry registry *registry
@ -68,11 +73,11 @@ func (s *Syntax) register(d definition) error {
} }
func (s *Syntax) AnyChar(name string, ct CommitType) error { func (s *Syntax) AnyChar(name string, ct CommitType) error {
return s.register(newChar(name, ct, true, false, nil, nil)) return s.Class(name, ct, true, nil, nil)
} }
func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error { func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error {
return s.register(newChar(name, ct, false, not, chars, ranges)) return s.register(newChar(name, ct, not, chars, ranges))
} }
func childName(name string, childIndex int) string { func childName(name string, childIndex int) string {
@ -84,19 +89,15 @@ func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error {
for i, ci := range chars { for i, ci := range chars {
ref := childName(name, i) ref := childName(name, i)
refs = append(refs, ref) refs = append(refs, ref)
if err := s.register(newChar(ref, Alias, false, false, []rune{ci}, nil)); err != nil { if err := s.register(newChar(ref, Alias, false, []rune{ci}, nil)); err != nil {
return err return err
} }
} }
return s.Sequence(name, ct, refs...) return s.Sequence(name, ct, namesToSequenceItems(refs)...)
} }
func (s *Syntax) Quantifier(name string, ct CommitType, item string, min, max int) error { func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) error {
return s.register(newQuantifier(name, ct, item, min, max))
}
func (s *Syntax) Sequence(name string, ct CommitType, items ...string) error {
return s.register(newSequence(name, ct, items)) return s.register(newSequence(name, ct, items))
} }

View File

@ -49,22 +49,19 @@ quantity:alias = count-quantifier
| zero-or-more | zero-or-more
| zero-or-one; | zero-or-one;
quantifier = (terminal | symbol | group) wsc* quantity; item = (terminal | symbol | group) quantity?;
sequence = item (wsc* item)*; // TODO: why was this '+'?
item:alias = terminal | symbol | group | quantifier; element:alias = terminal | symbol | group | sequence;
sequence = item (wsc* item)+;
element:alias = terminal | symbol | group | quantifier | sequence;
// DOC: once cached, doesn't try again, even in a new context, therefore the order may matter // DOC: once cached, doesn't try again, even in a new context, therefore the order may matter
choice = element (wsc* "|" wsc* element)+; choice = element (wsc* "|" wsc* element)+;
// DOC: not having 'not' needs some tricks sometimes // DOC: not having 'not' needs some tricks sometimes
expression:alias = terminal expression:alias = terminal
| symbol | symbol
| group | group
| quantifier
| sequence | sequence
| choice; | choice;