remove separate quantifier parser
This commit is contained in:
parent
df6b9e99f4
commit
b457d39101
2
Makefile
2
Makefile
@ -14,4 +14,4 @@ check: build
|
||||
fmt: $(SOURCES)
|
||||
@gofmt -w -s $(SOURCES)
|
||||
|
||||
precommit: build check fmt
|
||||
precommit: fmt build check
|
||||
|
48
boot.go
48
boot.go
@ -4,6 +4,7 @@ import (
|
||||
"errors"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var errInvalidDefinition = errors.New("invalid syntax definition")
|
||||
@ -32,11 +33,6 @@ func checkBootDefinitionLength(d []string) error {
|
||||
return errInvalidDefinition
|
||||
}
|
||||
|
||||
case "quantifier":
|
||||
if len(d) != 6 {
|
||||
return errInvalidDefinition
|
||||
}
|
||||
|
||||
case "sequence", "choice":
|
||||
if len(d) < 4 {
|
||||
return errInvalidDefinition
|
||||
@ -121,28 +117,42 @@ func defineBootCharSequence(s *Syntax, d []string) error {
|
||||
return s.CharSequence(d[1], ct, chars)
|
||||
}
|
||||
|
||||
func defineBootQuantifier(s *Syntax, d []string) error {
|
||||
ct := stringToCommitType(d[2])
|
||||
func namesToSequenceItemsQuantify(n []string, quantify bool) []SequenceItem {
|
||||
si := make([]SequenceItem, len(n))
|
||||
for i, ni := range n {
|
||||
var min, max int
|
||||
if quantify {
|
||||
nis := strings.Split(ni, ":")
|
||||
if len(nis) == 3 {
|
||||
ni = nis[0]
|
||||
|
||||
var (
|
||||
min, max int
|
||||
err error
|
||||
)
|
||||
var err error
|
||||
|
||||
if min, err = strconv.Atoi(d[4]); err != nil {
|
||||
return err
|
||||
min, err = strconv.Atoi(nis[1])
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
max, err = strconv.Atoi(nis[2])
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
si[i] = SequenceItem{Name: ni, Min: min, Max: max}
|
||||
}
|
||||
|
||||
if max, err = strconv.Atoi(d[5]); err != nil {
|
||||
return err
|
||||
}
|
||||
return si
|
||||
}
|
||||
|
||||
return s.Quantifier(d[1], ct, d[3], min, max)
|
||||
func namesToSequenceItems(n []string) []SequenceItem {
|
||||
return namesToSequenceItemsQuantify(n, false)
|
||||
}
|
||||
|
||||
func defineBootSequence(s *Syntax, d []string) error {
|
||||
ct := stringToCommitType(d[2])
|
||||
return s.Sequence(d[1], ct, d[3:]...)
|
||||
return s.Sequence(d[1], ct, namesToSequenceItemsQuantify(d[3:], true)...)
|
||||
}
|
||||
|
||||
func defineBootChoice(s *Syntax, d []string) error {
|
||||
@ -158,8 +168,6 @@ func defineBoot(s *Syntax, d []string) error {
|
||||
return defineBootClass(s, d)
|
||||
case "chars":
|
||||
return defineBootCharSequence(s, d)
|
||||
case "quantifier":
|
||||
return defineBootQuantifier(s, d)
|
||||
case "sequence":
|
||||
return defineBootSequence(s, d)
|
||||
case "choice":
|
||||
|
@ -7,7 +7,6 @@ import (
|
||||
|
||||
func TestBoot(t *testing.T) {
|
||||
var trace Trace
|
||||
// trace = NewTrace(2)
|
||||
|
||||
b, err := initBoot(trace, bootDefinitions)
|
||||
if err != nil {
|
||||
@ -29,9 +28,11 @@ func TestBoot(t *testing.T) {
|
||||
return
|
||||
}
|
||||
|
||||
// trace = NewTrace(1)
|
||||
s0 := NewSyntax(trace)
|
||||
if err := define(s0, n0); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
_, err = f.Seek(0, 0)
|
||||
@ -40,6 +41,12 @@ func TestBoot(t *testing.T) {
|
||||
return
|
||||
}
|
||||
|
||||
err = s0.Init()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
n1, err := s0.Parse(f)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
@ -44,7 +44,7 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"choice", "block-comment-char", "alias", "not-block-close", "not-star",
|
||||
}, {
|
||||
"quantifier", "block-comment-body", "alias", "block-comment-char", "0", "-1",
|
||||
"sequence", "block-comment-body", "alias", "block-comment-char:0:-1",
|
||||
}, {
|
||||
"sequence",
|
||||
"block-comment",
|
||||
@ -53,15 +53,15 @@ var bootDefinitions = [][]string{{
|
||||
"block-comment-body",
|
||||
"close-block-comment",
|
||||
}, {
|
||||
"quantifier", "not-nls", "alias", "not-nl", "0", "-1",
|
||||
"sequence", "not-nls", "alias", "not-nl:0:-1",
|
||||
}, {
|
||||
"sequence", "line-comment", "alias", "double-slash", "not-nls",
|
||||
}, {
|
||||
"choice", "comment-segment", "alias", "block-comment", "line-comment",
|
||||
}, {
|
||||
"quantifier", "wss", "alias", "ws", "0", "-1",
|
||||
"sequence", "wss", "alias", "ws:0:-1",
|
||||
}, {
|
||||
"quantifier", "optional-nl", "alias", "nl", "0", "1",
|
||||
"sequence", "optional-nl", "alias", "nl:0:1",
|
||||
}, {
|
||||
"choice",
|
||||
"ws-no-nl",
|
||||
@ -81,7 +81,7 @@ var bootDefinitions = [][]string{{
|
||||
"ws-no-nl",
|
||||
"comment-segment",
|
||||
}, {
|
||||
"quantifier", "continue-comment", "alias", "continue-comment-segment", "0", "-1",
|
||||
"sequence", "continue-comment", "alias", "continue-comment-segment:0:-1",
|
||||
}, {
|
||||
"sequence",
|
||||
"comment",
|
||||
@ -91,7 +91,7 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"choice", "wsc", "alias", "ws", "comment",
|
||||
}, {
|
||||
"quantifier", "wscs", "alias", "wsc", "0", "-1",
|
||||
"sequence", "wscs", "alias", "wsc:0:-1",
|
||||
}, {
|
||||
"anything", "anything", "alias",
|
||||
}, {
|
||||
@ -105,7 +105,7 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"chars", "dash", "alias", "-",
|
||||
}, {
|
||||
"quantifier", "optional-class-not", "alias", "class-not", "0", "1",
|
||||
"sequence", "optional-class-not", "alias", "class-not:0:1",
|
||||
}, {
|
||||
"class", "not-class-control", "alias", "^\\\\\\[\\]\\^\\-",
|
||||
}, {
|
||||
@ -119,7 +119,7 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"choice", "char-or-range", "alias", "class-char", "char-range",
|
||||
}, {
|
||||
"quantifier", "chars-or-ranges", "alias", "char-or-range", "0", "-1",
|
||||
"sequence", "chars-or-ranges", "alias", "char-or-range:0:-1",
|
||||
}, {
|
||||
"sequence", "char-class", "none", "open-square", "optional-class-not", "chars-or-ranges", "close-square",
|
||||
}, {
|
||||
@ -129,7 +129,7 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"choice", "sequence-char", "none", "not-char-sequence-control", "escaped-char",
|
||||
}, {
|
||||
"quantifier", "char-sequence-chars", "alias", "sequence-char", "0", "-1",
|
||||
"sequence", "char-sequence-chars", "alias", "sequence-char:0:-1",
|
||||
}, {
|
||||
"sequence", "char-sequence", "none", "double-quote", "char-sequence-chars", "double-quote",
|
||||
}, {
|
||||
@ -137,7 +137,7 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"class", "symbol-char", "alias", "^\\\\ \\n\\t\\b\\f\\r\\v\\b/.\\[\\]\\\"{}\\^+*?|():=;",
|
||||
}, {
|
||||
"quantifier", "symbol-chars", "alias", "symbol-char", "1", "-1",
|
||||
"sequence", "symbol-chars", "alias", "symbol-char:1:-1",
|
||||
}, {
|
||||
"sequence", "symbol", "none", "symbol-chars",
|
||||
}, {
|
||||
@ -153,7 +153,7 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"class", "digit", "alias", "0-9",
|
||||
}, {
|
||||
"quantifier", "number", "alias", "digit", "1", "-1",
|
||||
"sequence", "number", "alias", "digit:1:-1",
|
||||
}, {
|
||||
"sequence", "count", "none", "number",
|
||||
}, {
|
||||
@ -194,23 +194,23 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"choice", "quantifiable", "alias", "terminal", "symbol", "group",
|
||||
}, {
|
||||
"sequence", "quantifier", "none", "quantifiable", "wscs", "quantity",
|
||||
"choice", "item-choice", "alias", "terminal", "symbol", "group",
|
||||
}, {
|
||||
"choice", "item", "alias", "terminal", "symbol", "group", "quantifier",
|
||||
"sequence", "item", "none", "item-choice", "quantity:0:1",
|
||||
}, {
|
||||
"sequence", "item-continue", "alias", "wscs", "item",
|
||||
}, {
|
||||
"quantifier", "items-continue", "alias", "item-continue", "0", "-1",
|
||||
"sequence", "items-continue", "alias", "item-continue:0:-1",
|
||||
}, {
|
||||
"sequence", "sequence", "none", "item", "items-continue",
|
||||
}, {
|
||||
"choice", "element", "alias", "terminal", "symbol", "group", "quantifier", "sequence",
|
||||
"choice", "element", "alias", "terminal", "symbol", "group", "sequence",
|
||||
}, {
|
||||
"chars", "pipe", "alias", "|",
|
||||
}, {
|
||||
"sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element",
|
||||
}, {
|
||||
"quantifier", "elements-continue", "alias", "element-continue", "1", "-1",
|
||||
"sequence", "elements-continue", "alias", "element-continue:1:-1",
|
||||
}, {
|
||||
"sequence", "choice", "none", "element", "elements-continue",
|
||||
}, {
|
||||
@ -220,7 +220,6 @@ var bootDefinitions = [][]string{{
|
||||
"terminal",
|
||||
"symbol",
|
||||
"group",
|
||||
"quantifier",
|
||||
"sequence",
|
||||
"choice",
|
||||
}, {
|
||||
@ -236,7 +235,7 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"sequence", "flag-tag", "alias", "colon", "flag",
|
||||
}, {
|
||||
"quantifier", "flags", "alias", "flag-tag", "0", "-1",
|
||||
"sequence", "flags", "alias", "flag-tag:0:-1",
|
||||
}, {
|
||||
"chars", "equal", "alias", "=",
|
||||
}, {
|
||||
@ -246,7 +245,7 @@ var bootDefinitions = [][]string{{
|
||||
}, {
|
||||
"choice", "wsc-or-semicolon", "alias", "wsc", "semicolon",
|
||||
}, {
|
||||
"quantifier", "wsc-or-semicolons", "alias", "wsc-or-semicolon", "0", "-1",
|
||||
"sequence", "wsc-or-semicolons", "alias", "wsc-or-semicolon:0:-1",
|
||||
}, {
|
||||
"sequence",
|
||||
"subsequent-definition",
|
||||
@ -256,12 +255,10 @@ var bootDefinitions = [][]string{{
|
||||
"wsc-or-semicolons",
|
||||
"definition",
|
||||
}, {
|
||||
"quantifier",
|
||||
"sequence",
|
||||
"subsequent-definitions",
|
||||
"alias",
|
||||
"subsequent-definition",
|
||||
"0",
|
||||
"-1",
|
||||
"subsequent-definition:0:-1",
|
||||
}, {
|
||||
"sequence",
|
||||
"definitions",
|
||||
@ -269,12 +266,10 @@ var bootDefinitions = [][]string{{
|
||||
"definition",
|
||||
"subsequent-definitions",
|
||||
}, {
|
||||
"quantifier",
|
||||
"sequence",
|
||||
"opt-definitions",
|
||||
"alias",
|
||||
"definitions",
|
||||
"0",
|
||||
"1",
|
||||
"definitions:0:1",
|
||||
}, {
|
||||
"sequence",
|
||||
"syntax",
|
||||
|
28
char.go
28
char.go
@ -1,9 +1,10 @@
|
||||
package parse
|
||||
|
||||
// TODO: rename to token
|
||||
|
||||
type charParser struct {
|
||||
name string
|
||||
commit CommitType
|
||||
any bool
|
||||
not bool
|
||||
chars []rune
|
||||
ranges [][]rune
|
||||
@ -13,14 +14,13 @@ type charParser struct {
|
||||
func newChar(
|
||||
name string,
|
||||
ct CommitType,
|
||||
any, not bool,
|
||||
not bool,
|
||||
chars []rune,
|
||||
ranges [][]rune,
|
||||
) *charParser {
|
||||
return &charParser{
|
||||
name: name,
|
||||
commit: ct,
|
||||
any: any,
|
||||
not: not,
|
||||
chars: chars,
|
||||
ranges: ranges,
|
||||
@ -31,7 +31,11 @@ func (p *charParser) nodeName() string { return p.name }
|
||||
|
||||
func (p *charParser) parser(r *registry, path []string) (parser, error) {
|
||||
if stringsContain(path, p.name) {
|
||||
panic(errCannotIncludeParsers)
|
||||
panic(cannotIncludeParsers(p.name))
|
||||
}
|
||||
|
||||
if _, ok := r.parser(p.name); ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
r.setParser(p)
|
||||
@ -42,23 +46,19 @@ func (p *charParser) commitType() CommitType {
|
||||
return p.commit
|
||||
}
|
||||
|
||||
func (p *charParser) setIncludedBy(i parser, path []string) {
|
||||
func (p *charParser) setIncludedBy(including parser, path []string) {
|
||||
if stringsContain(path, p.name) {
|
||||
panic(errCannotIncludeParsers)
|
||||
panic(cannotIncludeParsers(p.name))
|
||||
}
|
||||
|
||||
p.includedBy = append(p.includedBy, i)
|
||||
p.includedBy = append(p.includedBy, including)
|
||||
}
|
||||
|
||||
func (p *charParser) cacheIncluded(*context, *Node) {
|
||||
panic(errCannotIncludeParsers)
|
||||
panic(cannotIncludeParsers(p.name))
|
||||
}
|
||||
|
||||
func (p *charParser) match(t rune) bool {
|
||||
if p.any {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, ci := range p.chars {
|
||||
if ci == t {
|
||||
return !p.not
|
||||
@ -93,8 +93,8 @@ func (p *charParser) parse(t Trace, c *context) {
|
||||
t.Out1("success", string(tok))
|
||||
n := newNode(p.name, p.commit, c.offset, c.offset+1)
|
||||
c.cache.set(c.offset, p.name, n)
|
||||
for _, i := range p.includedBy {
|
||||
i.cacheIncluded(c, n)
|
||||
for _, including := range p.includedBy {
|
||||
including.cacheIncluded(c, n)
|
||||
}
|
||||
|
||||
c.success(n)
|
||||
|
92
define.go
92
define.go
@ -106,19 +106,25 @@ func nodeChar(n *Node) rune {
|
||||
return toRune(s)
|
||||
}
|
||||
|
||||
func defineMember(s *Syntax, defaultName string, n *Node) (string, error) {
|
||||
switch n.Name {
|
||||
case "symbol":
|
||||
return n.Text(), nil
|
||||
default:
|
||||
return defaultName, defineExpression(s, defaultName, Alias, n)
|
||||
}
|
||||
}
|
||||
|
||||
func defineMembers(s *Syntax, name string, n ...*Node) ([]string, error) {
|
||||
var refs []string
|
||||
for i, ni := range n {
|
||||
nmi := childName(name, i)
|
||||
switch ni.Name {
|
||||
case "symbol":
|
||||
refs = append(refs, ni.Text())
|
||||
default:
|
||||
refs = append(refs, nmi)
|
||||
if err := defineExpression(s, nmi, Alias, ni); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ref, err := defineMember(s, nmi, ni)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
refs = append(refs, ref)
|
||||
}
|
||||
|
||||
return refs, nil
|
||||
@ -156,38 +162,33 @@ func defineCharSequence(s *Syntax, name string, ct CommitType, charNodes []*Node
|
||||
return s.CharSequence(name, ct, chars)
|
||||
}
|
||||
|
||||
func defineQuantifier(s *Syntax, name string, ct CommitType, n *Node, q *Node) error {
|
||||
refs, err := defineMembers(s, name, n)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var min, max int
|
||||
switch q.Name {
|
||||
func getQuantity(n *Node) (min int, max int, err error) {
|
||||
switch n.Name {
|
||||
case "count-quantifier":
|
||||
min, err = strconv.Atoi(q.Nodes[0].Text())
|
||||
min, err = strconv.Atoi(n.Nodes[0].Text())
|
||||
if err != nil {
|
||||
return err
|
||||
return
|
||||
}
|
||||
|
||||
max = min
|
||||
case "range-quantifier":
|
||||
min = 0
|
||||
max = -1
|
||||
for _, rq := range q.Nodes {
|
||||
for _, rq := range n.Nodes {
|
||||
switch rq.Name {
|
||||
case "range-from":
|
||||
min, err = strconv.Atoi(rq.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
return
|
||||
}
|
||||
case "range-to":
|
||||
max, err = strconv.Atoi(rq.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
return
|
||||
}
|
||||
default:
|
||||
return ErrInvalidSyntax
|
||||
err = ErrInvalidSyntax
|
||||
return
|
||||
}
|
||||
}
|
||||
case "one-or-more":
|
||||
@ -198,23 +199,42 @@ func defineQuantifier(s *Syntax, name string, ct CommitType, n *Node, q *Node) e
|
||||
min, max = 0, 1
|
||||
}
|
||||
|
||||
return s.Quantifier(name, ct, refs[0], min, max)
|
||||
return
|
||||
}
|
||||
|
||||
func defineSymbol(s *Syntax, name string, ct CommitType, n *Node) error {
|
||||
return s.Sequence(name, ct, SequenceItem{Name: n.Text()})
|
||||
}
|
||||
|
||||
func defineSequence(s *Syntax, name string, ct CommitType, n ...*Node) error {
|
||||
refs, err := defineMembers(s, name, n...)
|
||||
if err != nil {
|
||||
return err
|
||||
var items []SequenceItem
|
||||
for i, ni := range n {
|
||||
if ni.Name != "item" || len(ni.Nodes) == 0 {
|
||||
return ErrInvalidSyntax
|
||||
}
|
||||
|
||||
var (
|
||||
item SequenceItem
|
||||
err error
|
||||
)
|
||||
|
||||
defaultName := childName(name, i)
|
||||
item.Name, err = defineMember(s, defaultName, ni.Nodes[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(ni.Nodes) == 2 {
|
||||
item.Min, item.Max, err = getQuantity(ni.Nodes[1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
items = append(items, item)
|
||||
}
|
||||
|
||||
// // TODO: try to make this expressed in the syntax (maybe as sequences need either a quantififer or not
|
||||
// // one item? or by maintaining the excluded and caching in the sequence in a similar way when there is
|
||||
// // only one item?) how does this effect the quantifiers?
|
||||
// if len(refs) == 1 {
|
||||
// return s.Choice(name, ct, refs[0])
|
||||
// }
|
||||
|
||||
return s.Sequence(name, ct, refs...)
|
||||
return s.Sequence(name, ct, items...)
|
||||
}
|
||||
|
||||
func defineChoice(s *Syntax, name string, ct CommitType, n ...*Node) error {
|
||||
@ -236,9 +256,7 @@ func defineExpression(s *Syntax, name string, ct CommitType, expression *Node) e
|
||||
case "char-sequence":
|
||||
err = defineCharSequence(s, name, ct, expression.Nodes)
|
||||
case "symbol":
|
||||
err = defineSequence(s, name, ct, expression)
|
||||
case "quantifier":
|
||||
err = defineQuantifier(s, name, ct, expression.Nodes[0], expression.Nodes[1])
|
||||
err = defineSymbol(s, name, ct, expression)
|
||||
case "sequence":
|
||||
err = defineSequence(s, name, ct, expression.Nodes...)
|
||||
case "choice":
|
||||
|
11
parse.go
11
parse.go
@ -1,9 +1,6 @@
|
||||
package parse
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
import "fmt"
|
||||
|
||||
type definition interface {
|
||||
nodeName() string
|
||||
@ -18,12 +15,14 @@ type parser interface {
|
||||
parse(Trace, *context)
|
||||
}
|
||||
|
||||
var errCannotIncludeParsers = errors.New("cannot include parsers")
|
||||
|
||||
func parserNotFound(name string) error {
|
||||
return fmt.Errorf("parser not found: %s", name)
|
||||
}
|
||||
|
||||
func cannotIncludeParsers(name string) error {
|
||||
return fmt.Errorf("parser: %s cannot include other parsers", name)
|
||||
}
|
||||
|
||||
func stringsContain(ss []string, s string) bool {
|
||||
for _, si := range ss {
|
||||
if si == s {
|
||||
|
@ -358,9 +358,10 @@ func TestSequence(t *testing.T) {
|
||||
}},
|
||||
)
|
||||
|
||||
testString(
|
||||
testStringTrace(
|
||||
t,
|
||||
`A = "a" | (A?)*`,
|
||||
1,
|
||||
[]testItem{{
|
||||
msg: "sequence in choice with redundant quantifier",
|
||||
text: "aaa",
|
||||
@ -397,15 +398,15 @@ func TestQuantifiers(t *testing.T) {
|
||||
t,
|
||||
`A = "a" "b"{0} "a"`,
|
||||
[]testItem{{
|
||||
msg: "zero",
|
||||
text: "aa",
|
||||
msg: "zero, considered as one",
|
||||
text: "aba",
|
||||
node: &Node{
|
||||
Name: "A",
|
||||
to: 2,
|
||||
to: 3,
|
||||
},
|
||||
}, {
|
||||
msg: "zero, fail",
|
||||
text: "aba",
|
||||
text: "aa",
|
||||
fail: true,
|
||||
}},
|
||||
)
|
||||
@ -642,10 +643,9 @@ func TestQuantifiers(t *testing.T) {
|
||||
}},
|
||||
)
|
||||
|
||||
testStringTrace(
|
||||
testString(
|
||||
t,
|
||||
`A = "a" "b"{0,} "a"`,
|
||||
1,
|
||||
[]testItem{{
|
||||
msg: "zero or more, explicit, missing",
|
||||
text: "aa",
|
||||
@ -663,10 +663,9 @@ func TestQuantifiers(t *testing.T) {
|
||||
}},
|
||||
)
|
||||
|
||||
testStringTrace(
|
||||
testString(
|
||||
t,
|
||||
`A = "a" "b"* "a"`,
|
||||
1,
|
||||
[]testItem{{
|
||||
msg: "zero or more, shortcut, missing",
|
||||
text: "aa",
|
||||
@ -684,10 +683,9 @@ func TestQuantifiers(t *testing.T) {
|
||||
}},
|
||||
)
|
||||
|
||||
testStringTrace(
|
||||
testString(
|
||||
t,
|
||||
`A = "a" "b"{1,} "a"`,
|
||||
1,
|
||||
[]testItem{{
|
||||
msg: "one or more, explicit, missing",
|
||||
text: "aa",
|
||||
@ -702,10 +700,9 @@ func TestQuantifiers(t *testing.T) {
|
||||
}},
|
||||
)
|
||||
|
||||
testStringTrace(
|
||||
testString(
|
||||
t,
|
||||
`A = "a" "b"+ "a"`,
|
||||
1,
|
||||
[]testItem{{
|
||||
msg: "one or more, shortcut, missing",
|
||||
text: "aa",
|
||||
@ -720,10 +717,9 @@ func TestQuantifiers(t *testing.T) {
|
||||
}},
|
||||
)
|
||||
|
||||
testStringTrace(
|
||||
testString(
|
||||
t,
|
||||
`A = "a" "b"{3,} "a"`,
|
||||
1,
|
||||
[]testItem{{
|
||||
msg: "three or more, explicit, missing",
|
||||
text: "abba",
|
||||
|
172
quantifier.go
172
quantifier.go
@ -1,172 +0,0 @@
|
||||
package parse
|
||||
|
||||
type quantifierDefinition struct {
|
||||
name string
|
||||
commit CommitType
|
||||
min, max int
|
||||
item string
|
||||
}
|
||||
|
||||
type quantifierParser struct {
|
||||
name string
|
||||
commit CommitType
|
||||
min, max int
|
||||
item parser
|
||||
includedBy []parser
|
||||
}
|
||||
|
||||
func newQuantifier(name string, ct CommitType, item string, min, max int) *quantifierDefinition {
|
||||
return &quantifierDefinition{
|
||||
name: name,
|
||||
commit: ct,
|
||||
min: min,
|
||||
max: max,
|
||||
item: item,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *quantifierDefinition) nodeName() string { return d.name }
|
||||
|
||||
func (d *quantifierDefinition) parser(r *registry, path []string) (parser, error) {
|
||||
if stringsContain(path, d.name) {
|
||||
panic(errCannotIncludeParsers)
|
||||
}
|
||||
|
||||
p, ok := r.parser(d.name)
|
||||
if ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
qp := &quantifierParser{
|
||||
name: d.name,
|
||||
commit: d.commit,
|
||||
min: d.min,
|
||||
max: d.max,
|
||||
}
|
||||
|
||||
r.setParser(qp)
|
||||
|
||||
item, ok := r.parser(d.item)
|
||||
if !ok {
|
||||
itemDefinition, ok := r.definition(d.item)
|
||||
if !ok {
|
||||
return nil, parserNotFound(d.item)
|
||||
}
|
||||
|
||||
var err error
|
||||
item, err = itemDefinition.parser(r, path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
qp.item = item
|
||||
return qp, nil
|
||||
}
|
||||
|
||||
func (d *quantifierDefinition) commitType() CommitType { return d.commit }
|
||||
func (p *quantifierParser) nodeName() string { return p.name }
|
||||
|
||||
// TODO: merge the quantifier into the sequence
|
||||
// DOC: sequences are hungry and are not revisited, a*a cannot match anything.
|
||||
// DOC: how to match a tailing a? (..)*a | .(..)*a
|
||||
|
||||
func (p *quantifierParser) setIncludedBy(i parser, path []string) {
|
||||
if stringsContain(path, p.name) {
|
||||
panic(errCannotIncludeParsers)
|
||||
}
|
||||
|
||||
p.includedBy = append(p.includedBy, i)
|
||||
}
|
||||
|
||||
func (p *quantifierParser) cacheIncluded(*context, *Node) {
|
||||
panic(errCannotIncludeParsers)
|
||||
}
|
||||
|
||||
func (p *quantifierParser) parse(t Trace, c *context) {
|
||||
t = t.Extend(p.name)
|
||||
t.Out1("parsing quantifier", c.offset)
|
||||
|
||||
if p.commit&Documentation != 0 {
|
||||
t.Out1("fail, doc")
|
||||
c.fail(c.offset)
|
||||
return
|
||||
}
|
||||
|
||||
if c.excluded(c.offset, p.name) {
|
||||
t.Out1("excluded")
|
||||
c.fail(c.offset)
|
||||
return
|
||||
}
|
||||
|
||||
c.exclude(c.offset, p.name)
|
||||
defer c.include(c.offset, p.name)
|
||||
|
||||
node := newNode(p.name, p.commit, c.offset, c.offset)
|
||||
|
||||
// this way of checking the cache definitely needs the testing of the russ cox form
|
||||
for {
|
||||
if p.max >= 0 && node.nodeLength() == p.max {
|
||||
t.Out1("success, max reached")
|
||||
c.cache.set(node.from, p.name, node)
|
||||
for _, i := range p.includedBy {
|
||||
i.cacheIncluded(c, node)
|
||||
}
|
||||
|
||||
c.success(node)
|
||||
return
|
||||
}
|
||||
|
||||
t.Out2("next quantifier item")
|
||||
|
||||
// n, m, ok := c.cache.get(c.offset, p.item.nodeName())
|
||||
m, ok := c.fromCache(p.item.nodeName())
|
||||
if ok {
|
||||
t.Out1("quantifier item found in cache, match:", m, c.offset, c.node.tokenLength())
|
||||
if m {
|
||||
node.append(c.node)
|
||||
if c.node.tokenLength() > 0 {
|
||||
t.Out2("taking next after cached found")
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if node.nodeLength() >= p.min {
|
||||
t.Out1("success, no more match")
|
||||
c.cache.set(node.from, p.name, node)
|
||||
for _, i := range p.includedBy {
|
||||
i.cacheIncluded(c, node)
|
||||
}
|
||||
|
||||
c.success(node)
|
||||
} else {
|
||||
t.Out1("fail, min not reached")
|
||||
c.cache.set(node.from, p.name, nil)
|
||||
c.fail(node.from)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
p.item.parse(t, c)
|
||||
if !c.match || c.node.tokenLength() == 0 {
|
||||
if node.nodeLength() >= p.min {
|
||||
t.Out1("success, no more match")
|
||||
c.cache.set(node.from, p.name, node)
|
||||
for _, i := range p.includedBy {
|
||||
i.cacheIncluded(c, node)
|
||||
}
|
||||
|
||||
c.success(node)
|
||||
} else {
|
||||
t.Out1("fail, min not reached")
|
||||
c.cache.set(node.from, p.name, nil)
|
||||
c.fail(node.from)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
node.append(c.node)
|
||||
}
|
||||
}
|
103
sequence.go
103
sequence.go
@ -3,17 +3,18 @@ package parse
|
||||
type sequenceDefinition struct {
|
||||
name string
|
||||
commit CommitType
|
||||
items []string
|
||||
items []SequenceItem
|
||||
}
|
||||
|
||||
type sequenceParser struct {
|
||||
name string
|
||||
commit CommitType
|
||||
items []parser
|
||||
ranges [][]int
|
||||
including []parser
|
||||
}
|
||||
|
||||
func newSequence(name string, ct CommitType, items []string) *sequenceDefinition {
|
||||
func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition {
|
||||
return &sequenceDefinition{
|
||||
name: name,
|
||||
commit: ct,
|
||||
@ -25,7 +26,7 @@ func (d *sequenceDefinition) nodeName() string { return d.name }
|
||||
|
||||
func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error) {
|
||||
if stringsContain(path, d.name) {
|
||||
panic(errCannotIncludeParsers)
|
||||
panic(cannotIncludeParsers(d.name))
|
||||
}
|
||||
|
||||
p, ok := r.parser(d.name)
|
||||
@ -40,34 +41,47 @@ func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error)
|
||||
|
||||
r.setParser(sp)
|
||||
|
||||
var items []parser
|
||||
var (
|
||||
items []parser
|
||||
ranges [][]int
|
||||
)
|
||||
|
||||
path = append(path, d.name)
|
||||
for _, name := range d.items {
|
||||
item, ok := r.parser(name)
|
||||
for _, item := range d.items {
|
||||
if item.Min == 0 && item.Max == 0 {
|
||||
item.Min, item.Max = 1, 1
|
||||
} else if item.Max == 0 {
|
||||
item.Max = -1
|
||||
}
|
||||
|
||||
pi, ok := r.parser(item.Name)
|
||||
if ok {
|
||||
items = append(items, item)
|
||||
items = append(items, pi)
|
||||
ranges = append(ranges, []int{item.Min, item.Max})
|
||||
continue
|
||||
}
|
||||
|
||||
itemDefinition, ok := r.definition(name)
|
||||
itemDefinition, ok := r.definition(item.Name)
|
||||
if !ok {
|
||||
return nil, parserNotFound(name)
|
||||
return nil, parserNotFound(item.Name)
|
||||
}
|
||||
|
||||
item, err := itemDefinition.parser(r, path)
|
||||
pi, err := itemDefinition.parser(r, path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
items = append(items, item)
|
||||
items = append(items, pi)
|
||||
ranges = append(ranges, []int{item.Min, item.Max})
|
||||
}
|
||||
|
||||
// for single items, acts like a choice
|
||||
if len(items) == 1 {
|
||||
if len(items) == 1 && ranges[0][0] == 1 && ranges[0][1] == 1 {
|
||||
items[0].setIncludedBy(sp, path)
|
||||
}
|
||||
|
||||
sp.items = items
|
||||
sp.ranges = ranges
|
||||
return sp, nil
|
||||
}
|
||||
|
||||
@ -112,6 +126,8 @@ e = b | d
|
||||
input: 111
|
||||
*/
|
||||
|
||||
// TODO: apply the quantifier migration to the syntax
|
||||
|
||||
func (p *sequenceParser) parse(t Trace, c *context) {
|
||||
t = t.Extend(p.name)
|
||||
t.Out1("parsing sequence", c.offset)
|
||||
@ -122,8 +138,6 @@ func (p *sequenceParser) parse(t Trace, c *context) {
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: maybe we can check the cache here? no because that would exclude the continuations
|
||||
|
||||
if c.excluded(c.offset, p.name) {
|
||||
t.Out1("excluded")
|
||||
c.fail(c.offset)
|
||||
@ -134,54 +148,75 @@ func (p *sequenceParser) parse(t Trace, c *context) {
|
||||
defer c.include(c.offset, p.name)
|
||||
|
||||
items := p.items
|
||||
ranges := p.ranges
|
||||
var currentCount int
|
||||
node := newNode(p.name, p.commit, c.offset, c.offset)
|
||||
|
||||
for len(items) > 0 {
|
||||
t.Out2("next sequence item")
|
||||
// n, m, ok := c.cache.get(c.offset, items[0].nodeName())
|
||||
m, ok := c.fromCache(items[0].nodeName())
|
||||
if ok {
|
||||
t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset)
|
||||
if m {
|
||||
t.Out2("sequence item from cache:", c.node.Name, len(c.node.Nodes), c.node.from)
|
||||
node.append(c.node)
|
||||
items = items[1:]
|
||||
if c.node.tokenLength() > 0 {
|
||||
node.append(c.node)
|
||||
currentCount++
|
||||
}
|
||||
|
||||
if c.node.tokenLength() == 0 || ranges[0][1] >= 0 && currentCount == ranges[0][1] {
|
||||
items = items[1:]
|
||||
ranges = ranges[1:]
|
||||
currentCount = 0
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
c.cache.set(node.from, p.name, nil)
|
||||
c.fail(node.from)
|
||||
return
|
||||
if currentCount < ranges[0][0] {
|
||||
c.cache.set(node.from, p.name, nil)
|
||||
c.fail(node.from)
|
||||
return
|
||||
}
|
||||
|
||||
items = items[1:]
|
||||
ranges = ranges[1:]
|
||||
currentCount = 0
|
||||
continue
|
||||
}
|
||||
|
||||
items[0].parse(t, c)
|
||||
items = items[1:]
|
||||
|
||||
if !c.match {
|
||||
t.Out1("fail, item failed")
|
||||
c.cache.set(node.from, p.name, nil)
|
||||
c.fail(node.from)
|
||||
return
|
||||
if currentCount < ranges[0][0] {
|
||||
t.Out1("fail, item failed")
|
||||
c.cache.set(node.from, p.name, nil)
|
||||
c.fail(node.from)
|
||||
return
|
||||
}
|
||||
|
||||
items = items[1:]
|
||||
ranges = ranges[1:]
|
||||
currentCount = 0
|
||||
continue
|
||||
}
|
||||
|
||||
if c.node.tokenLength() > 0 {
|
||||
t.Out2("appending sequence item", c.node.Name, len(c.node.Nodes))
|
||||
node.append(c.node)
|
||||
currentCount++
|
||||
}
|
||||
|
||||
if c.node.tokenLength() == 0 || ranges[0][1] >= 0 && currentCount == ranges[0][1] {
|
||||
items = items[1:]
|
||||
ranges = ranges[1:]
|
||||
currentCount = 0
|
||||
}
|
||||
}
|
||||
|
||||
t.Out1("success, items parsed")
|
||||
t.Out2("nodes", node.nodeLength())
|
||||
if node.Name == "group" {
|
||||
t.Out2("caching group", node.from, node.Nodes[2].Name, node.Nodes[2].nodeLength())
|
||||
}
|
||||
|
||||
// is this cached item ever taken?
|
||||
c.cache.set(node.from, p.name, node)
|
||||
for _, i := range p.including {
|
||||
i.cacheIncluded(c, node)
|
||||
}
|
||||
|
||||
t.Out2("caching sequence and included by done")
|
||||
c.success(node)
|
||||
}
|
||||
|
19
syntax.go
19
syntax.go
@ -16,6 +16,11 @@ const (
|
||||
Root
|
||||
)
|
||||
|
||||
type SequenceItem struct {
|
||||
Name string
|
||||
Min, Max int // 0,0 considered as 1,1, x,0 considered as x,-1
|
||||
}
|
||||
|
||||
type Syntax struct {
|
||||
trace Trace
|
||||
registry *registry
|
||||
@ -68,11 +73,11 @@ func (s *Syntax) register(d definition) error {
|
||||
}
|
||||
|
||||
func (s *Syntax) AnyChar(name string, ct CommitType) error {
|
||||
return s.register(newChar(name, ct, true, false, nil, nil))
|
||||
return s.Class(name, ct, true, nil, nil)
|
||||
}
|
||||
|
||||
func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error {
|
||||
return s.register(newChar(name, ct, false, not, chars, ranges))
|
||||
return s.register(newChar(name, ct, not, chars, ranges))
|
||||
}
|
||||
|
||||
func childName(name string, childIndex int) string {
|
||||
@ -84,19 +89,15 @@ func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error {
|
||||
for i, ci := range chars {
|
||||
ref := childName(name, i)
|
||||
refs = append(refs, ref)
|
||||
if err := s.register(newChar(ref, Alias, false, false, []rune{ci}, nil)); err != nil {
|
||||
if err := s.register(newChar(ref, Alias, false, []rune{ci}, nil)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return s.Sequence(name, ct, refs...)
|
||||
return s.Sequence(name, ct, namesToSequenceItems(refs)...)
|
||||
}
|
||||
|
||||
func (s *Syntax) Quantifier(name string, ct CommitType, item string, min, max int) error {
|
||||
return s.register(newQuantifier(name, ct, item, min, max))
|
||||
}
|
||||
|
||||
func (s *Syntax) Sequence(name string, ct CommitType, items ...string) error {
|
||||
func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) error {
|
||||
return s.register(newSequence(name, ct, items))
|
||||
}
|
||||
|
||||
|
11
syntax.p
11
syntax.p
@ -49,22 +49,19 @@ quantity:alias = count-quantifier
|
||||
| zero-or-more
|
||||
| zero-or-one;
|
||||
|
||||
quantifier = (terminal | symbol | group) wsc* quantity;
|
||||
item = (terminal | symbol | group) quantity?;
|
||||
sequence = item (wsc* item)*; // TODO: why was this '+'?
|
||||
|
||||
item:alias = terminal | symbol | group | quantifier;
|
||||
sequence = item (wsc* item)+;
|
||||
|
||||
element:alias = terminal | symbol | group | quantifier | sequence;
|
||||
element:alias = terminal | symbol | group | sequence;
|
||||
|
||||
// DOC: once cached, doesn't try again, even in a new context, therefore the order may matter
|
||||
choice = element (wsc* "|" wsc* element)+;
|
||||
choice = element (wsc* "|" wsc* element)+;
|
||||
|
||||
// DOC: not having 'not' needs some tricks sometimes
|
||||
|
||||
expression:alias = terminal
|
||||
| symbol
|
||||
| group
|
||||
| quantifier
|
||||
| sequence
|
||||
| choice;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user