refactor parse phase - parsers

This commit is contained in:
Arpad Ryszka 2017-11-02 22:19:03 +01:00
parent 0efa15656d
commit 77c3356427
7 changed files with 141 additions and 170 deletions

View File

@ -204,15 +204,15 @@ var bootSyntaxDefs = [][]string{{
}, { }, {
"sequence", "sequence", "none", "item", "items-continue", "sequence", "sequence", "none", "item", "items-continue",
}, { }, {
"choice", "element", "alias", "terminal", "symbol", "group", "sequence", "choice", "option", "alias", "terminal", "symbol", "group", "sequence",
}, { }, {
"chars", "pipe", "alias", "|", "chars", "pipe", "alias", "|",
}, { }, {
"sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element", "sequence", "option-continue", "alias", "wscs", "pipe", "wscs", "option",
}, { }, {
"sequence", "elements-continue", "alias", "element-continue:1:-1", "sequence", "options-continue", "alias", "option-continue:1:-1",
}, { }, {
"sequence", "choice", "none", "element", "elements-continue", "sequence", "choice", "none", "option", "options-continue",
}, { }, {
"choice", "choice",
"expression", "expression",

22
char.go
View File

@ -1,12 +1,12 @@
package treerack package treerack
type charParser struct { type charParser struct {
name string name string
id int id int
not bool not bool
chars []rune chars []rune
ranges [][]rune ranges [][]rune
includedBy []int generalizations []int
} }
func newChar( func newChar(
@ -32,12 +32,12 @@ func (p *charParser) setCommitType(ct CommitType) {}
func (p *charParser) validate(*registry) error { return nil } func (p *charParser) validate(*registry) error { return nil }
func (p *charParser) init(*registry) {} func (p *charParser) init(*registry) {}
func (p *charParser) setIncludedBy(includedBy int) { func (p *charParser) addGeneralization(g int) {
if intsContain(p.includedBy, includedBy) { if intsContain(p.generalizations, g) {
return return
} }
p.includedBy = append(p.includedBy, includedBy) p.generalizations = append(p.generalizations, g)
} }
func (p *charParser) parser() parser { return p } func (p *charParser) parser() parser { return p }
@ -69,10 +69,6 @@ func (p *charParser) parse(c *context) {
return return
} }
for _, includedBy := range p.includedBy {
c.results.setMatch(c.offset, includedBy, c.offset+1)
}
c.success(c.offset + 1) c.success(c.offset + 1)
} }

130
choice.go
View File

@ -1,39 +1,38 @@
package treerack package treerack
type choiceDefinition struct { type choiceDefinition struct {
name string name string
id int id int
commit CommitType commit CommitType
elements []string options []string
elementDefs []definition optionDefs []definition
includedBy []int generalizations []int
cbuilder *choiceBuilder cparser *choiceParser
cparser *choiceParser cbuilder *choiceBuilder
validated bool validated bool
initialized bool initialized bool
} }
type choiceParser struct { type choiceParser struct {
name string name string
id int id int
commit CommitType commit CommitType
elements []parser options []parser
includedBy []int generalizations []int
} }
type choiceBuilder struct { type choiceBuilder struct {
name string name string
id int id int
commit CommitType commit CommitType
elements []builder options []builder
includedBy *idSet
} }
func newChoice(name string, ct CommitType, elements []string) *choiceDefinition { func newChoice(name string, ct CommitType, options []string) *choiceDefinition {
return &choiceDefinition{ return &choiceDefinition{
name: name, name: name,
commit: ct, commit: ct,
elements: elements, options: options,
} }
} }
@ -50,10 +49,10 @@ func (d *choiceDefinition) validate(r *registry) error {
} }
d.validated = true d.validated = true
for i := range d.elements { for i := range d.options {
e, ok := r.definitions[d.elements[i]] e, ok := r.definitions[d.options[i]]
if !ok { if !ok {
return parserNotFound(d.elements[i]) return parserNotFound(d.options[i])
} }
if err := e.validate(r); err != nil { if err := e.validate(r); err != nil {
@ -70,20 +69,19 @@ func (d *choiceDefinition) ensureBuilder() {
} }
d.cbuilder = &choiceBuilder{ d.cbuilder = &choiceBuilder{
name: d.name, name: d.name,
id: d.id, id: d.id,
commit: d.commit, commit: d.commit,
includedBy: &idSet{},
} }
} }
func (d *choiceDefinition) initElements(r *registry) { func (d *choiceDefinition) initOptions(r *registry) {
for _, e := range d.elements { for _, e := range d.options {
def := r.definitions[e] def := r.definitions[e]
d.elementDefs = append(d.elementDefs, def) d.optionDefs = append(d.optionDefs, def)
def.init(r) def.init(r)
d.cbuilder.elements = append(d.cbuilder.elements, def.builder()) d.cbuilder.options = append(d.cbuilder.options, def.builder())
def.setIncludedBy(d.id) def.addGeneralization(d.id)
} }
} }
@ -94,35 +92,34 @@ func (d *choiceDefinition) init(r *registry) {
d.initialized = true d.initialized = true
d.ensureBuilder() d.ensureBuilder()
d.initElements(r) d.initOptions(r)
} }
func (d *choiceDefinition) setIncludedBy(includedBy int) { func (d *choiceDefinition) addGeneralization(g int) {
if intsContain(d.includedBy, includedBy) { if intsContain(d.generalizations, g) {
return return
} }
d.includedBy = append(d.includedBy, includedBy) d.generalizations = append(d.generalizations, g)
d.ensureBuilder() d.ensureBuilder()
d.cbuilder.includedBy.set(includedBy) for _, e := range d.optionDefs {
for _, e := range d.elementDefs { e.addGeneralization(g)
e.setIncludedBy(includedBy)
} }
} }
func (d *choiceDefinition) createParser() { func (d *choiceDefinition) createParser() {
d.cparser = &choiceParser{ d.cparser = &choiceParser{
name: d.name, name: d.name,
id: d.id, id: d.id,
commit: d.commit, commit: d.commit,
includedBy: d.includedBy, generalizations: d.generalizations,
} }
} }
func (d *choiceDefinition) createElementParsers() { func (d *choiceDefinition) createOptionParsers() {
for _, def := range d.elementDefs { for _, def := range d.optionDefs {
element := def.parser() option := def.parser()
d.cparser.elements = append(d.cparser.elements, element) d.cparser.options = append(d.cparser.options, option)
} }
} }
@ -132,13 +129,14 @@ func (d *choiceDefinition) parser() parser {
} }
d.createParser() d.createParser()
d.createElementParsers() d.createOptionParsers()
return d.cparser return d.cparser
} }
func (d *choiceDefinition) builder() builder { return d.cbuilder } func (d *choiceDefinition) builder() builder { return d.cbuilder }
func (p *choiceParser) nodeName() string { return p.name }
func (p *choiceParser) nodeID() int { return p.id } func (p *choiceParser) nodeName() string { return p.name }
func (p *choiceParser) nodeID() int { return p.id }
func (p *choiceParser) parse(c *context) { func (p *choiceParser) parse(c *context) {
if c.fromResults(p.id) { if c.fromResults(p.id) {
@ -155,21 +153,21 @@ func (p *choiceParser) parse(c *context) {
to := c.offset to := c.offset
var match bool var match bool
var elementIndex int var optionIndex int
var foundMatch bool var foundMatch bool
for { for {
foundMatch = false foundMatch = false
elementIndex = 0 optionIndex = 0
// TODO: // TODO:
// - avoid double parsing by setting first-from-store in the context, prepare in advance to // - avoid double parsing by setting first-from-store in the context, prepare in advance to
// know whether it can be it's own item // know whether it can be it's own item
// - it is also important to figure why disabling the failed elements breaks the parsing // - it is also important to figure why disabling the failed options breaks the parsing
for elementIndex < len(p.elements) { for optionIndex < len(p.options) {
p.elements[elementIndex].parse(c) p.options[optionIndex].parse(c)
elementIndex++ optionIndex++
if !c.matchLast || match && c.offset <= to { if !c.matchLast || match && c.offset <= to {
c.offset = from c.offset = from
@ -204,26 +202,26 @@ func (b *choiceBuilder) nodeName() string { return b.name }
func (b *choiceBuilder) nodeID() int { return b.id } func (b *choiceBuilder) nodeID() int { return b.id }
func (b *choiceBuilder) build(c *context) ([]*Node, bool) { func (b *choiceBuilder) build(c *context) ([]*Node, bool) {
to, ok := c.results.takeMatch(c.offset, b.id, b.includedBy) to, ok := c.results.takeMatch(c.offset, b.id)
if !ok { if !ok {
return nil, false return nil, false
} }
var element builder var option builder
for _, e := range b.elements { for _, e := range b.options {
if c.results.hasMatchTo(c.offset, e.nodeID(), to) { if c.results.hasMatchTo(c.offset, e.nodeID(), to) {
element = e option = e
break break
} }
} }
if element == nil { if option == nil {
panic("damaged parse result") panic("damaged parse result")
} }
from := c.offset from := c.offset
n, ok := element.build(c) n, ok := option.build(c)
if !ok { if !ok {
panic("damaged parse result") panic("damaged parse result")
} }

View File

@ -55,7 +55,7 @@ func (s *results) hasMatchTo(offset, id, to int) bool {
return false return false
} }
func (s *results) takeMatch(offset, id int, includedBy *idSet) (int, bool) { func (s *results) takeMatch(offset, id int) (int, bool) {
if len(s.match) <= offset { if len(s.match) <= offset {
return 0, false return 0, false
} }
@ -81,29 +81,11 @@ func (s *results) takeMatch(offset, id int, includedBy *idSet) (int, bool) {
if found && to-offset > 0 { if found && to-offset > 0 {
s.match[offset][index] = -1 s.match[offset][index] = -1
for i := 0; i < len(s.match[offset]); i += 2 {
if includedBy.has(s.match[offset][i]) && s.match[offset][i+1] == to {
s.match[offset][i] = -1
}
}
} }
return to, found return to, found
} }
func (s *results) takeMatchLength(offset, id, to int) {
if len(s.match) <= offset {
return
}
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] == id && s.match[offset][i+1] == to {
s.match[offset][i] = -1
return
}
}
}
func (s *results) ensureOffset(offset int) { func (s *results) ensureOffset(offset int) {
if len(s.match) > offset { if len(s.match) > offset {
return return

View File

@ -1,38 +1,37 @@
package treerack package treerack
type sequenceDefinition struct { type sequenceDefinition struct {
name string name string
id int id int
commit CommitType commit CommitType
items []SequenceItem items []SequenceItem
itemDefs []definition itemDefs []definition
includedBy []int ranges [][]int
ranges [][]int generalizations []int
sbuilder *sequenceBuilder sbuilder *sequenceBuilder
sparser *sequenceParser sparser *sequenceParser
allChars bool allChars bool
validated bool validated bool
initialized bool initialized bool
} }
type sequenceParser struct { type sequenceParser struct {
name string name string
id int id int
commit CommitType commit CommitType
items []parser items []parser
ranges [][]int ranges [][]int
includedBy []int generalizations []int
allChars bool allChars bool
} }
type sequenceBuilder struct { type sequenceBuilder struct {
name string name string
id int id int
commit CommitType commit CommitType
items []builder items []builder
ranges [][]int ranges [][]int
includedBy *idSet allChars bool
allChars bool
} }
func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition { func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition {
@ -70,20 +69,15 @@ func (d *sequenceDefinition) validate(r *registry) error {
return nil return nil
} }
func (d *sequenceDefinition) includeItems() bool {
return len(d.items) == 1 && d.items[0].Max == 1
}
func (d *sequenceDefinition) ensureBuilder() { func (d *sequenceDefinition) ensureBuilder() {
if d.sbuilder != nil { if d.sbuilder != nil {
return return
} }
d.sbuilder = &sequenceBuilder{ d.sbuilder = &sequenceBuilder{
name: d.name, name: d.name,
id: d.id, id: d.id,
commit: d.commit, commit: d.commit,
includedBy: &idSet{},
} }
} }
@ -117,6 +111,10 @@ func (d *sequenceDefinition) initItems(r *registry) {
d.allChars = allChars d.allChars = allChars
} }
func (d *sequenceDefinition) canHaveSpecializations() bool {
return len(d.items) == 1 && d.items[0].Max == 1
}
func (d *sequenceDefinition) init(r *registry) { func (d *sequenceDefinition) init(r *registry) {
if d.initialized { if d.initialized {
return return
@ -127,32 +125,31 @@ func (d *sequenceDefinition) init(r *registry) {
d.ensureBuilder() d.ensureBuilder()
d.sbuilder.ranges = d.ranges d.sbuilder.ranges = d.ranges
d.initItems(r) d.initItems(r)
if d.includeItems() { if d.canHaveSpecializations() {
d.itemDefs[0].setIncludedBy(d.id) d.itemDefs[0].addGeneralization(d.id)
} }
} }
func (d *sequenceDefinition) setIncludedBy(includedBy int) { func (d *sequenceDefinition) addGeneralization(g int) {
if intsContain(d.includedBy, includedBy) { if intsContain(d.generalizations, g) {
return return
} }
d.includedBy = append(d.includedBy, includedBy) d.generalizations = append(d.generalizations, g)
d.ensureBuilder() d.ensureBuilder()
d.sbuilder.includedBy.set(includedBy) if d.canHaveSpecializations() {
if d.includeItems() { d.itemDefs[0].addGeneralization(g)
d.itemDefs[0].setIncludedBy(includedBy)
} }
} }
func (d *sequenceDefinition) createParser() { func (d *sequenceDefinition) createParser() {
d.sparser = &sequenceParser{ d.sparser = &sequenceParser{
name: d.name, name: d.name,
id: d.id, id: d.id,
commit: d.commit, commit: d.commit,
includedBy: d.includedBy, generalizations: d.generalizations,
allChars: d.allChars, allChars: d.allChars,
ranges: d.ranges, ranges: d.ranges,
} }
} }
@ -174,8 +171,9 @@ func (d *sequenceDefinition) parser() parser {
} }
func (d *sequenceDefinition) builder() builder { return d.sbuilder } func (d *sequenceDefinition) builder() builder { return d.sbuilder }
func (p *sequenceParser) nodeName() string { return p.name }
func (p *sequenceParser) nodeID() int { return p.id } func (p *sequenceParser) nodeName() string { return p.name }
func (p *sequenceParser) nodeID() int { return p.id }
func (p *sequenceParser) parse(c *context) { func (p *sequenceParser) parse(c *context) {
if !p.allChars { if !p.allChars {
@ -194,7 +192,9 @@ func (p *sequenceParser) parse(c *context) {
var parsed bool var parsed bool
for itemIndex < len(p.items) { for itemIndex < len(p.items) {
// TODO: is it ok to parse before max range check? what if max=0 // TODO:
// - is it ok to parse before max range check? what if max=0
// - validate, normalize and document max=0
p.items[itemIndex].parse(c) p.items[itemIndex].parse(c)
if !c.matchLast { if !c.matchLast {
if currentCount < p.ranges[itemIndex][0] { if currentCount < p.ranges[itemIndex][0] {
@ -219,23 +219,21 @@ func (p *sequenceParser) parse(c *context) {
to = c.offset to = c.offset
// TODO: max cannot be 0
if !parsed || p.ranges[itemIndex][1] >= 0 && currentCount == p.ranges[itemIndex][1] { if !parsed || p.ranges[itemIndex][1] >= 0 && currentCount == p.ranges[itemIndex][1] {
itemIndex++ itemIndex++
currentCount = 0 currentCount = 0
} }
} }
if !p.allChars { for _, g := range p.generalizations {
for _, includedBy := range p.includedBy { if c.pending(from, g) {
if c.pending(from, includedBy) { c.results.setMatch(from, g, to)
c.results.setMatch(from, includedBy, to)
}
} }
} }
c.results.setMatch(from, p.id, to) c.results.setMatch(from, p.id, to)
c.success(to) c.success(to)
if !p.allChars { if !p.allChars {
c.unmarkPending(from, p.id) c.unmarkPending(from, p.id)
} }
@ -245,12 +243,11 @@ func (b *sequenceBuilder) nodeName() string { return b.name }
func (b *sequenceBuilder) nodeID() int { return b.id } func (b *sequenceBuilder) nodeID() int { return b.id }
func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
to, ok := c.results.takeMatch(c.offset, b.id, b.includedBy) to, ok := c.results.takeMatch(c.offset, b.id)
if !ok { if !ok {
return nil, false return nil, false
} }
// maybe something like this:
if to-c.offset == 0 && b.commit&Alias != 0 { if to-c.offset == 0 && b.commit&Alias != 0 {
return nil, true return nil, true
} }
@ -290,8 +287,6 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
continue continue
} }
// maybe can handle the commit type differently
parsed := c.offset > itemFrom parsed := c.offset > itemFrom
if parsed || len(n) > 0 { if parsed || len(n) > 0 {
nodes = append(nodes, n...) nodes = append(nodes, n...)

View File

@ -42,7 +42,7 @@ type definition interface {
setID(int) setID(int)
validate(*registry) error validate(*registry) error
init(*registry) init(*registry)
setIncludedBy(int) addGeneralization(int)
parser() parser parser() parser
builder() builder builder() builder
} }
@ -231,16 +231,16 @@ func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) err
return s.sequence(name, ct, items...) return s.sequence(name, ct, items...)
} }
func (s *Syntax) choice(name string, ct CommitType, elements ...string) error { func (s *Syntax) choice(name string, ct CommitType, options ...string) error {
return s.register(newChoice(name, ct, elements)) return s.register(newChoice(name, ct, options))
} }
func (s *Syntax) Choice(name string, ct CommitType, elements ...string) error { func (s *Syntax) Choice(name string, ct CommitType, options ...string) error {
if !isValidSymbol(name) { if !isValidSymbol(name) {
return ErrInvalidSymbolName return ErrInvalidSymbolName
} }
return s.choice(name, ct, elements...) return s.choice(name, ct, options...)
} }
func (s *Syntax) Read(r io.Reader) error { func (s *Syntax) Read(r io.Reader) error {

View File

@ -43,10 +43,10 @@ quantity:alias = count-quantifier
item:nows = (terminal | symbol | group) quantity?; item:nows = (terminal | symbol | group) quantity?;
sequence = item+; sequence = item+;
element:alias = terminal | symbol | group | sequence; option:alias = terminal | symbol | group | sequence;
// DOC: how the order matters // DOC: how the order matters
choice = element ("|" element)+; choice = option ("|" option)+;
// DOC: not having 'not' needs some tricks sometimes // DOC: not having 'not' needs some tricks sometimes