diff --git a/Makefile b/Makefile index 0e1b440..e2fa897 100644 --- a/Makefile +++ b/Makefile @@ -14,4 +14,4 @@ check: build fmt: $(SOURCES) @gofmt -w -s $(SOURCES) -precommit: build check fmt +precommit: fmt build check diff --git a/boot.go b/boot.go index e68bc6b..9fd41df 100644 --- a/boot.go +++ b/boot.go @@ -4,6 +4,7 @@ import ( "errors" "os" "strconv" + "strings" ) var errInvalidDefinition = errors.New("invalid syntax definition") @@ -32,11 +33,6 @@ func checkBootDefinitionLength(d []string) error { return errInvalidDefinition } - case "quantifier": - if len(d) != 6 { - return errInvalidDefinition - } - case "sequence", "choice": if len(d) < 4 { return errInvalidDefinition @@ -121,28 +117,42 @@ func defineBootCharSequence(s *Syntax, d []string) error { return s.CharSequence(d[1], ct, chars) } -func defineBootQuantifier(s *Syntax, d []string) error { - ct := stringToCommitType(d[2]) +func namesToSequenceItemsQuantify(n []string, quantify bool) []SequenceItem { + si := make([]SequenceItem, len(n)) + for i, ni := range n { + var min, max int + if quantify { + nis := strings.Split(ni, ":") + if len(nis) == 3 { + ni = nis[0] - var ( - min, max int - err error - ) + var err error - if min, err = strconv.Atoi(d[4]); err != nil { - return err + min, err = strconv.Atoi(nis[1]) + if err != nil { + panic(err) + } + + max, err = strconv.Atoi(nis[2]) + if err != nil { + panic(err) + } + } + } + + si[i] = SequenceItem{Name: ni, Min: min, Max: max} } - if max, err = strconv.Atoi(d[5]); err != nil { - return err - } + return si +} - return s.Quantifier(d[1], ct, d[3], min, max) +func namesToSequenceItems(n []string) []SequenceItem { + return namesToSequenceItemsQuantify(n, false) } func defineBootSequence(s *Syntax, d []string) error { ct := stringToCommitType(d[2]) - return s.Sequence(d[1], ct, d[3:]...) + return s.Sequence(d[1], ct, namesToSequenceItemsQuantify(d[3:], true)...) } func defineBootChoice(s *Syntax, d []string) error { @@ -158,8 +168,6 @@ func defineBoot(s *Syntax, d []string) error { return defineBootClass(s, d) case "chars": return defineBootCharSequence(s, d) - case "quantifier": - return defineBootQuantifier(s, d) case "sequence": return defineBootSequence(s, d) case "choice": diff --git a/boot_test.go b/boot_test.go index 0e85f68..2922811 100644 --- a/boot_test.go +++ b/boot_test.go @@ -7,7 +7,6 @@ import ( func TestBoot(t *testing.T) { var trace Trace - // trace = NewTrace(2) b, err := initBoot(trace, bootDefinitions) if err != nil { @@ -29,9 +28,11 @@ func TestBoot(t *testing.T) { return } + // trace = NewTrace(1) s0 := NewSyntax(trace) if err := define(s0, n0); err != nil { t.Error(err) + return } _, err = f.Seek(0, 0) @@ -40,6 +41,12 @@ func TestBoot(t *testing.T) { return } + err = s0.Init() + if err != nil { + t.Error(err) + return + } + n1, err := s0.Parse(f) if err != nil { t.Error(err) diff --git a/bootsyntax.go b/bootsyntax.go index 2bcf746..76e41ba 100644 --- a/bootsyntax.go +++ b/bootsyntax.go @@ -44,7 +44,7 @@ var bootDefinitions = [][]string{{ }, { "choice", "block-comment-char", "alias", "not-block-close", "not-star", }, { - "quantifier", "block-comment-body", "alias", "block-comment-char", "0", "-1", + "sequence", "block-comment-body", "alias", "block-comment-char:0:-1", }, { "sequence", "block-comment", @@ -53,15 +53,15 @@ var bootDefinitions = [][]string{{ "block-comment-body", "close-block-comment", }, { - "quantifier", "not-nls", "alias", "not-nl", "0", "-1", + "sequence", "not-nls", "alias", "not-nl:0:-1", }, { "sequence", "line-comment", "alias", "double-slash", "not-nls", }, { "choice", "comment-segment", "alias", "block-comment", "line-comment", }, { - "quantifier", "wss", "alias", "ws", "0", "-1", + "sequence", "wss", "alias", "ws:0:-1", }, { - "quantifier", "optional-nl", "alias", "nl", "0", "1", + "sequence", "optional-nl", "alias", "nl:0:1", }, { "choice", "ws-no-nl", @@ -81,7 +81,7 @@ var bootDefinitions = [][]string{{ "ws-no-nl", "comment-segment", }, { - "quantifier", "continue-comment", "alias", "continue-comment-segment", "0", "-1", + "sequence", "continue-comment", "alias", "continue-comment-segment:0:-1", }, { "sequence", "comment", @@ -91,7 +91,7 @@ var bootDefinitions = [][]string{{ }, { "choice", "wsc", "alias", "ws", "comment", }, { - "quantifier", "wscs", "alias", "wsc", "0", "-1", + "sequence", "wscs", "alias", "wsc:0:-1", }, { "anything", "anything", "alias", }, { @@ -105,7 +105,7 @@ var bootDefinitions = [][]string{{ }, { "chars", "dash", "alias", "-", }, { - "quantifier", "optional-class-not", "alias", "class-not", "0", "1", + "sequence", "optional-class-not", "alias", "class-not:0:1", }, { "class", "not-class-control", "alias", "^\\\\\\[\\]\\^\\-", }, { @@ -119,7 +119,7 @@ var bootDefinitions = [][]string{{ }, { "choice", "char-or-range", "alias", "class-char", "char-range", }, { - "quantifier", "chars-or-ranges", "alias", "char-or-range", "0", "-1", + "sequence", "chars-or-ranges", "alias", "char-or-range:0:-1", }, { "sequence", "char-class", "none", "open-square", "optional-class-not", "chars-or-ranges", "close-square", }, { @@ -129,7 +129,7 @@ var bootDefinitions = [][]string{{ }, { "choice", "sequence-char", "none", "not-char-sequence-control", "escaped-char", }, { - "quantifier", "char-sequence-chars", "alias", "sequence-char", "0", "-1", + "sequence", "char-sequence-chars", "alias", "sequence-char:0:-1", }, { "sequence", "char-sequence", "none", "double-quote", "char-sequence-chars", "double-quote", }, { @@ -137,7 +137,7 @@ var bootDefinitions = [][]string{{ }, { "class", "symbol-char", "alias", "^\\\\ \\n\\t\\b\\f\\r\\v\\b/.\\[\\]\\\"{}\\^+*?|():=;", }, { - "quantifier", "symbol-chars", "alias", "symbol-char", "1", "-1", + "sequence", "symbol-chars", "alias", "symbol-char:1:-1", }, { "sequence", "symbol", "none", "symbol-chars", }, { @@ -153,7 +153,7 @@ var bootDefinitions = [][]string{{ }, { "class", "digit", "alias", "0-9", }, { - "quantifier", "number", "alias", "digit", "1", "-1", + "sequence", "number", "alias", "digit:1:-1", }, { "sequence", "count", "none", "number", }, { @@ -194,23 +194,23 @@ var bootDefinitions = [][]string{{ }, { "choice", "quantifiable", "alias", "terminal", "symbol", "group", }, { - "sequence", "quantifier", "none", "quantifiable", "wscs", "quantity", + "choice", "item-choice", "alias", "terminal", "symbol", "group", }, { - "choice", "item", "alias", "terminal", "symbol", "group", "quantifier", + "sequence", "item", "none", "item-choice", "quantity:0:1", }, { "sequence", "item-continue", "alias", "wscs", "item", }, { - "quantifier", "items-continue", "alias", "item-continue", "0", "-1", + "sequence", "items-continue", "alias", "item-continue:0:-1", }, { "sequence", "sequence", "none", "item", "items-continue", }, { - "choice", "element", "alias", "terminal", "symbol", "group", "quantifier", "sequence", + "choice", "element", "alias", "terminal", "symbol", "group", "sequence", }, { "chars", "pipe", "alias", "|", }, { "sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element", }, { - "quantifier", "elements-continue", "alias", "element-continue", "1", "-1", + "sequence", "elements-continue", "alias", "element-continue:1:-1", }, { "sequence", "choice", "none", "element", "elements-continue", }, { @@ -220,7 +220,6 @@ var bootDefinitions = [][]string{{ "terminal", "symbol", "group", - "quantifier", "sequence", "choice", }, { @@ -236,7 +235,7 @@ var bootDefinitions = [][]string{{ }, { "sequence", "flag-tag", "alias", "colon", "flag", }, { - "quantifier", "flags", "alias", "flag-tag", "0", "-1", + "sequence", "flags", "alias", "flag-tag:0:-1", }, { "chars", "equal", "alias", "=", }, { @@ -246,7 +245,7 @@ var bootDefinitions = [][]string{{ }, { "choice", "wsc-or-semicolon", "alias", "wsc", "semicolon", }, { - "quantifier", "wsc-or-semicolons", "alias", "wsc-or-semicolon", "0", "-1", + "sequence", "wsc-or-semicolons", "alias", "wsc-or-semicolon:0:-1", }, { "sequence", "subsequent-definition", @@ -256,12 +255,10 @@ var bootDefinitions = [][]string{{ "wsc-or-semicolons", "definition", }, { - "quantifier", + "sequence", "subsequent-definitions", "alias", - "subsequent-definition", - "0", - "-1", + "subsequent-definition:0:-1", }, { "sequence", "definitions", @@ -269,12 +266,10 @@ var bootDefinitions = [][]string{{ "definition", "subsequent-definitions", }, { - "quantifier", + "sequence", "opt-definitions", "alias", - "definitions", - "0", - "1", + "definitions:0:1", }, { "sequence", "syntax", diff --git a/char.go b/char.go index 5f61636..2d777eb 100644 --- a/char.go +++ b/char.go @@ -1,9 +1,10 @@ package parse +// TODO: rename to token + type charParser struct { name string commit CommitType - any bool not bool chars []rune ranges [][]rune @@ -13,14 +14,13 @@ type charParser struct { func newChar( name string, ct CommitType, - any, not bool, + not bool, chars []rune, ranges [][]rune, ) *charParser { return &charParser{ name: name, commit: ct, - any: any, not: not, chars: chars, ranges: ranges, @@ -31,7 +31,11 @@ func (p *charParser) nodeName() string { return p.name } func (p *charParser) parser(r *registry, path []string) (parser, error) { if stringsContain(path, p.name) { - panic(errCannotIncludeParsers) + panic(cannotIncludeParsers(p.name)) + } + + if _, ok := r.parser(p.name); ok { + return p, nil } r.setParser(p) @@ -42,23 +46,19 @@ func (p *charParser) commitType() CommitType { return p.commit } -func (p *charParser) setIncludedBy(i parser, path []string) { +func (p *charParser) setIncludedBy(including parser, path []string) { if stringsContain(path, p.name) { - panic(errCannotIncludeParsers) + panic(cannotIncludeParsers(p.name)) } - p.includedBy = append(p.includedBy, i) + p.includedBy = append(p.includedBy, including) } func (p *charParser) cacheIncluded(*context, *Node) { - panic(errCannotIncludeParsers) + panic(cannotIncludeParsers(p.name)) } func (p *charParser) match(t rune) bool { - if p.any { - return true - } - for _, ci := range p.chars { if ci == t { return !p.not @@ -93,8 +93,8 @@ func (p *charParser) parse(t Trace, c *context) { t.Out1("success", string(tok)) n := newNode(p.name, p.commit, c.offset, c.offset+1) c.cache.set(c.offset, p.name, n) - for _, i := range p.includedBy { - i.cacheIncluded(c, n) + for _, including := range p.includedBy { + including.cacheIncluded(c, n) } c.success(n) diff --git a/define.go b/define.go index f11f6de..c382788 100644 --- a/define.go +++ b/define.go @@ -106,19 +106,25 @@ func nodeChar(n *Node) rune { return toRune(s) } +func defineMember(s *Syntax, defaultName string, n *Node) (string, error) { + switch n.Name { + case "symbol": + return n.Text(), nil + default: + return defaultName, defineExpression(s, defaultName, Alias, n) + } +} + func defineMembers(s *Syntax, name string, n ...*Node) ([]string, error) { var refs []string for i, ni := range n { nmi := childName(name, i) - switch ni.Name { - case "symbol": - refs = append(refs, ni.Text()) - default: - refs = append(refs, nmi) - if err := defineExpression(s, nmi, Alias, ni); err != nil { - return nil, err - } + ref, err := defineMember(s, nmi, ni) + if err != nil { + return nil, err } + + refs = append(refs, ref) } return refs, nil @@ -156,38 +162,33 @@ func defineCharSequence(s *Syntax, name string, ct CommitType, charNodes []*Node return s.CharSequence(name, ct, chars) } -func defineQuantifier(s *Syntax, name string, ct CommitType, n *Node, q *Node) error { - refs, err := defineMembers(s, name, n) - if err != nil { - return err - } - - var min, max int - switch q.Name { +func getQuantity(n *Node) (min int, max int, err error) { + switch n.Name { case "count-quantifier": - min, err = strconv.Atoi(q.Nodes[0].Text()) + min, err = strconv.Atoi(n.Nodes[0].Text()) if err != nil { - return err + return } max = min case "range-quantifier": min = 0 max = -1 - for _, rq := range q.Nodes { + for _, rq := range n.Nodes { switch rq.Name { case "range-from": min, err = strconv.Atoi(rq.Text()) if err != nil { - return err + return } case "range-to": max, err = strconv.Atoi(rq.Text()) if err != nil { - return err + return } default: - return ErrInvalidSyntax + err = ErrInvalidSyntax + return } } case "one-or-more": @@ -198,23 +199,42 @@ func defineQuantifier(s *Syntax, name string, ct CommitType, n *Node, q *Node) e min, max = 0, 1 } - return s.Quantifier(name, ct, refs[0], min, max) + return +} + +func defineSymbol(s *Syntax, name string, ct CommitType, n *Node) error { + return s.Sequence(name, ct, SequenceItem{Name: n.Text()}) } func defineSequence(s *Syntax, name string, ct CommitType, n ...*Node) error { - refs, err := defineMembers(s, name, n...) - if err != nil { - return err + var items []SequenceItem + for i, ni := range n { + if ni.Name != "item" || len(ni.Nodes) == 0 { + return ErrInvalidSyntax + } + + var ( + item SequenceItem + err error + ) + + defaultName := childName(name, i) + item.Name, err = defineMember(s, defaultName, ni.Nodes[0]) + if err != nil { + return err + } + + if len(ni.Nodes) == 2 { + item.Min, item.Max, err = getQuantity(ni.Nodes[1]) + if err != nil { + return err + } + } + + items = append(items, item) } - // // TODO: try to make this expressed in the syntax (maybe as sequences need either a quantififer or not - // // one item? or by maintaining the excluded and caching in the sequence in a similar way when there is - // // only one item?) how does this effect the quantifiers? - // if len(refs) == 1 { - // return s.Choice(name, ct, refs[0]) - // } - - return s.Sequence(name, ct, refs...) + return s.Sequence(name, ct, items...) } func defineChoice(s *Syntax, name string, ct CommitType, n ...*Node) error { @@ -236,9 +256,7 @@ func defineExpression(s *Syntax, name string, ct CommitType, expression *Node) e case "char-sequence": err = defineCharSequence(s, name, ct, expression.Nodes) case "symbol": - err = defineSequence(s, name, ct, expression) - case "quantifier": - err = defineQuantifier(s, name, ct, expression.Nodes[0], expression.Nodes[1]) + err = defineSymbol(s, name, ct, expression) case "sequence": err = defineSequence(s, name, ct, expression.Nodes...) case "choice": diff --git a/parse.go b/parse.go index 4929ab2..16110e1 100644 --- a/parse.go +++ b/parse.go @@ -1,9 +1,6 @@ package parse -import ( - "errors" - "fmt" -) +import "fmt" type definition interface { nodeName() string @@ -18,12 +15,14 @@ type parser interface { parse(Trace, *context) } -var errCannotIncludeParsers = errors.New("cannot include parsers") - func parserNotFound(name string) error { return fmt.Errorf("parser not found: %s", name) } +func cannotIncludeParsers(name string) error { + return fmt.Errorf("parser: %s cannot include other parsers", name) +} + func stringsContain(ss []string, s string) bool { for _, si := range ss { if si == s { diff --git a/parse_test.go b/parse_test.go index 6439f75..a94b805 100644 --- a/parse_test.go +++ b/parse_test.go @@ -358,9 +358,10 @@ func TestSequence(t *testing.T) { }}, ) - testString( + testStringTrace( t, `A = "a" | (A?)*`, + 1, []testItem{{ msg: "sequence in choice with redundant quantifier", text: "aaa", @@ -397,15 +398,15 @@ func TestQuantifiers(t *testing.T) { t, `A = "a" "b"{0} "a"`, []testItem{{ - msg: "zero", - text: "aa", + msg: "zero, considered as one", + text: "aba", node: &Node{ Name: "A", - to: 2, + to: 3, }, }, { msg: "zero, fail", - text: "aba", + text: "aa", fail: true, }}, ) @@ -642,10 +643,9 @@ func TestQuantifiers(t *testing.T) { }}, ) - testStringTrace( + testString( t, `A = "a" "b"{0,} "a"`, - 1, []testItem{{ msg: "zero or more, explicit, missing", text: "aa", @@ -663,10 +663,9 @@ func TestQuantifiers(t *testing.T) { }}, ) - testStringTrace( + testString( t, `A = "a" "b"* "a"`, - 1, []testItem{{ msg: "zero or more, shortcut, missing", text: "aa", @@ -684,10 +683,9 @@ func TestQuantifiers(t *testing.T) { }}, ) - testStringTrace( + testString( t, `A = "a" "b"{1,} "a"`, - 1, []testItem{{ msg: "one or more, explicit, missing", text: "aa", @@ -702,10 +700,9 @@ func TestQuantifiers(t *testing.T) { }}, ) - testStringTrace( + testString( t, `A = "a" "b"+ "a"`, - 1, []testItem{{ msg: "one or more, shortcut, missing", text: "aa", @@ -720,10 +717,9 @@ func TestQuantifiers(t *testing.T) { }}, ) - testStringTrace( + testString( t, `A = "a" "b"{3,} "a"`, - 1, []testItem{{ msg: "three or more, explicit, missing", text: "abba", diff --git a/quantifier.go b/quantifier.go deleted file mode 100644 index e18606c..0000000 --- a/quantifier.go +++ /dev/null @@ -1,172 +0,0 @@ -package parse - -type quantifierDefinition struct { - name string - commit CommitType - min, max int - item string -} - -type quantifierParser struct { - name string - commit CommitType - min, max int - item parser - includedBy []parser -} - -func newQuantifier(name string, ct CommitType, item string, min, max int) *quantifierDefinition { - return &quantifierDefinition{ - name: name, - commit: ct, - min: min, - max: max, - item: item, - } -} - -func (d *quantifierDefinition) nodeName() string { return d.name } - -func (d *quantifierDefinition) parser(r *registry, path []string) (parser, error) { - if stringsContain(path, d.name) { - panic(errCannotIncludeParsers) - } - - p, ok := r.parser(d.name) - if ok { - return p, nil - } - - qp := &quantifierParser{ - name: d.name, - commit: d.commit, - min: d.min, - max: d.max, - } - - r.setParser(qp) - - item, ok := r.parser(d.item) - if !ok { - itemDefinition, ok := r.definition(d.item) - if !ok { - return nil, parserNotFound(d.item) - } - - var err error - item, err = itemDefinition.parser(r, path) - if err != nil { - return nil, err - } - } - - qp.item = item - return qp, nil -} - -func (d *quantifierDefinition) commitType() CommitType { return d.commit } -func (p *quantifierParser) nodeName() string { return p.name } - -// TODO: merge the quantifier into the sequence -// DOC: sequences are hungry and are not revisited, a*a cannot match anything. -// DOC: how to match a tailing a? (..)*a | .(..)*a - -func (p *quantifierParser) setIncludedBy(i parser, path []string) { - if stringsContain(path, p.name) { - panic(errCannotIncludeParsers) - } - - p.includedBy = append(p.includedBy, i) -} - -func (p *quantifierParser) cacheIncluded(*context, *Node) { - panic(errCannotIncludeParsers) -} - -func (p *quantifierParser) parse(t Trace, c *context) { - t = t.Extend(p.name) - t.Out1("parsing quantifier", c.offset) - - if p.commit&Documentation != 0 { - t.Out1("fail, doc") - c.fail(c.offset) - return - } - - if c.excluded(c.offset, p.name) { - t.Out1("excluded") - c.fail(c.offset) - return - } - - c.exclude(c.offset, p.name) - defer c.include(c.offset, p.name) - - node := newNode(p.name, p.commit, c.offset, c.offset) - - // this way of checking the cache definitely needs the testing of the russ cox form - for { - if p.max >= 0 && node.nodeLength() == p.max { - t.Out1("success, max reached") - c.cache.set(node.from, p.name, node) - for _, i := range p.includedBy { - i.cacheIncluded(c, node) - } - - c.success(node) - return - } - - t.Out2("next quantifier item") - - // n, m, ok := c.cache.get(c.offset, p.item.nodeName()) - m, ok := c.fromCache(p.item.nodeName()) - if ok { - t.Out1("quantifier item found in cache, match:", m, c.offset, c.node.tokenLength()) - if m { - node.append(c.node) - if c.node.tokenLength() > 0 { - t.Out2("taking next after cached found") - continue - } - } - - if node.nodeLength() >= p.min { - t.Out1("success, no more match") - c.cache.set(node.from, p.name, node) - for _, i := range p.includedBy { - i.cacheIncluded(c, node) - } - - c.success(node) - } else { - t.Out1("fail, min not reached") - c.cache.set(node.from, p.name, nil) - c.fail(node.from) - } - - return - } - - p.item.parse(t, c) - if !c.match || c.node.tokenLength() == 0 { - if node.nodeLength() >= p.min { - t.Out1("success, no more match") - c.cache.set(node.from, p.name, node) - for _, i := range p.includedBy { - i.cacheIncluded(c, node) - } - - c.success(node) - } else { - t.Out1("fail, min not reached") - c.cache.set(node.from, p.name, nil) - c.fail(node.from) - } - - return - } - - node.append(c.node) - } -} diff --git a/sequence.go b/sequence.go index 0539e62..2067d89 100644 --- a/sequence.go +++ b/sequence.go @@ -3,17 +3,18 @@ package parse type sequenceDefinition struct { name string commit CommitType - items []string + items []SequenceItem } type sequenceParser struct { name string commit CommitType items []parser + ranges [][]int including []parser } -func newSequence(name string, ct CommitType, items []string) *sequenceDefinition { +func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition { return &sequenceDefinition{ name: name, commit: ct, @@ -25,7 +26,7 @@ func (d *sequenceDefinition) nodeName() string { return d.name } func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error) { if stringsContain(path, d.name) { - panic(errCannotIncludeParsers) + panic(cannotIncludeParsers(d.name)) } p, ok := r.parser(d.name) @@ -40,34 +41,47 @@ func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error) r.setParser(sp) - var items []parser + var ( + items []parser + ranges [][]int + ) + path = append(path, d.name) - for _, name := range d.items { - item, ok := r.parser(name) + for _, item := range d.items { + if item.Min == 0 && item.Max == 0 { + item.Min, item.Max = 1, 1 + } else if item.Max == 0 { + item.Max = -1 + } + + pi, ok := r.parser(item.Name) if ok { - items = append(items, item) + items = append(items, pi) + ranges = append(ranges, []int{item.Min, item.Max}) continue } - itemDefinition, ok := r.definition(name) + itemDefinition, ok := r.definition(item.Name) if !ok { - return nil, parserNotFound(name) + return nil, parserNotFound(item.Name) } - item, err := itemDefinition.parser(r, path) + pi, err := itemDefinition.parser(r, path) if err != nil { return nil, err } - items = append(items, item) + items = append(items, pi) + ranges = append(ranges, []int{item.Min, item.Max}) } // for single items, acts like a choice - if len(items) == 1 { + if len(items) == 1 && ranges[0][0] == 1 && ranges[0][1] == 1 { items[0].setIncludedBy(sp, path) } sp.items = items + sp.ranges = ranges return sp, nil } @@ -112,6 +126,8 @@ e = b | d input: 111 */ +// TODO: apply the quantifier migration to the syntax + func (p *sequenceParser) parse(t Trace, c *context) { t = t.Extend(p.name) t.Out1("parsing sequence", c.offset) @@ -122,8 +138,6 @@ func (p *sequenceParser) parse(t Trace, c *context) { return } - // TODO: maybe we can check the cache here? no because that would exclude the continuations - if c.excluded(c.offset, p.name) { t.Out1("excluded") c.fail(c.offset) @@ -134,54 +148,75 @@ func (p *sequenceParser) parse(t Trace, c *context) { defer c.include(c.offset, p.name) items := p.items + ranges := p.ranges + var currentCount int node := newNode(p.name, p.commit, c.offset, c.offset) for len(items) > 0 { - t.Out2("next sequence item") - // n, m, ok := c.cache.get(c.offset, items[0].nodeName()) m, ok := c.fromCache(items[0].nodeName()) if ok { t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset) if m { - t.Out2("sequence item from cache:", c.node.Name, len(c.node.Nodes), c.node.from) - node.append(c.node) - items = items[1:] + if c.node.tokenLength() > 0 { + node.append(c.node) + currentCount++ + } + + if c.node.tokenLength() == 0 || ranges[0][1] >= 0 && currentCount == ranges[0][1] { + items = items[1:] + ranges = ranges[1:] + currentCount = 0 + } + continue } - c.cache.set(node.from, p.name, nil) - c.fail(node.from) - return + if currentCount < ranges[0][0] { + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + return + } + + items = items[1:] + ranges = ranges[1:] + currentCount = 0 + continue } items[0].parse(t, c) - items = items[1:] if !c.match { - t.Out1("fail, item failed") - c.cache.set(node.from, p.name, nil) - c.fail(node.from) - return + if currentCount < ranges[0][0] { + t.Out1("fail, item failed") + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + return + } + + items = items[1:] + ranges = ranges[1:] + currentCount = 0 + continue } if c.node.tokenLength() > 0 { - t.Out2("appending sequence item", c.node.Name, len(c.node.Nodes)) node.append(c.node) + currentCount++ + } + + if c.node.tokenLength() == 0 || ranges[0][1] >= 0 && currentCount == ranges[0][1] { + items = items[1:] + ranges = ranges[1:] + currentCount = 0 } } t.Out1("success, items parsed") - t.Out2("nodes", node.nodeLength()) - if node.Name == "group" { - t.Out2("caching group", node.from, node.Nodes[2].Name, node.Nodes[2].nodeLength()) - } - // is this cached item ever taken? c.cache.set(node.from, p.name, node) for _, i := range p.including { i.cacheIncluded(c, node) } - t.Out2("caching sequence and included by done") c.success(node) } diff --git a/syntax.go b/syntax.go index aa44626..8872dc6 100644 --- a/syntax.go +++ b/syntax.go @@ -16,6 +16,11 @@ const ( Root ) +type SequenceItem struct { + Name string + Min, Max int // 0,0 considered as 1,1, x,0 considered as x,-1 +} + type Syntax struct { trace Trace registry *registry @@ -68,11 +73,11 @@ func (s *Syntax) register(d definition) error { } func (s *Syntax) AnyChar(name string, ct CommitType) error { - return s.register(newChar(name, ct, true, false, nil, nil)) + return s.Class(name, ct, true, nil, nil) } func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error { - return s.register(newChar(name, ct, false, not, chars, ranges)) + return s.register(newChar(name, ct, not, chars, ranges)) } func childName(name string, childIndex int) string { @@ -84,19 +89,15 @@ func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error { for i, ci := range chars { ref := childName(name, i) refs = append(refs, ref) - if err := s.register(newChar(ref, Alias, false, false, []rune{ci}, nil)); err != nil { + if err := s.register(newChar(ref, Alias, false, []rune{ci}, nil)); err != nil { return err } } - return s.Sequence(name, ct, refs...) + return s.Sequence(name, ct, namesToSequenceItems(refs)...) } -func (s *Syntax) Quantifier(name string, ct CommitType, item string, min, max int) error { - return s.register(newQuantifier(name, ct, item, min, max)) -} - -func (s *Syntax) Sequence(name string, ct CommitType, items ...string) error { +func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) error { return s.register(newSequence(name, ct, items)) } diff --git a/syntax.p b/syntax.p index 87a483a..d5d454f 100644 --- a/syntax.p +++ b/syntax.p @@ -49,22 +49,19 @@ quantity:alias = count-quantifier | zero-or-more | zero-or-one; -quantifier = (terminal | symbol | group) wsc* quantity; +item = (terminal | symbol | group) quantity?; +sequence = item (wsc* item)*; // TODO: why was this '+'? -item:alias = terminal | symbol | group | quantifier; -sequence = item (wsc* item)+; - -element:alias = terminal | symbol | group | quantifier | sequence; +element:alias = terminal | symbol | group | sequence; // DOC: once cached, doesn't try again, even in a new context, therefore the order may matter -choice = element (wsc* "|" wsc* element)+; +choice = element (wsc* "|" wsc* element)+; // DOC: not having 'not' needs some tricks sometimes expression:alias = terminal | symbol | group - | quantifier | sequence | choice;