refactor parse phase - parsers

This commit is contained in:
Arpad Ryszka 2017-11-02 22:19:03 +01:00
parent 0efa15656d
commit 77c3356427
7 changed files with 141 additions and 170 deletions

View File

@ -204,15 +204,15 @@ var bootSyntaxDefs = [][]string{{
}, {
"sequence", "sequence", "none", "item", "items-continue",
}, {
"choice", "element", "alias", "terminal", "symbol", "group", "sequence",
"choice", "option", "alias", "terminal", "symbol", "group", "sequence",
}, {
"chars", "pipe", "alias", "|",
}, {
"sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element",
"sequence", "option-continue", "alias", "wscs", "pipe", "wscs", "option",
}, {
"sequence", "elements-continue", "alias", "element-continue:1:-1",
"sequence", "options-continue", "alias", "option-continue:1:-1",
}, {
"sequence", "choice", "none", "element", "elements-continue",
"sequence", "choice", "none", "option", "options-continue",
}, {
"choice",
"expression",

22
char.go
View File

@ -1,12 +1,12 @@
package treerack
type charParser struct {
name string
id int
not bool
chars []rune
ranges [][]rune
includedBy []int
name string
id int
not bool
chars []rune
ranges [][]rune
generalizations []int
}
func newChar(
@ -32,12 +32,12 @@ func (p *charParser) setCommitType(ct CommitType) {}
func (p *charParser) validate(*registry) error { return nil }
func (p *charParser) init(*registry) {}
func (p *charParser) setIncludedBy(includedBy int) {
if intsContain(p.includedBy, includedBy) {
func (p *charParser) addGeneralization(g int) {
if intsContain(p.generalizations, g) {
return
}
p.includedBy = append(p.includedBy, includedBy)
p.generalizations = append(p.generalizations, g)
}
func (p *charParser) parser() parser { return p }
@ -69,10 +69,6 @@ func (p *charParser) parse(c *context) {
return
}
for _, includedBy := range p.includedBy {
c.results.setMatch(c.offset, includedBy, c.offset+1)
}
c.success(c.offset + 1)
}

130
choice.go
View File

@ -1,39 +1,38 @@
package treerack
type choiceDefinition struct {
name string
id int
commit CommitType
elements []string
elementDefs []definition
includedBy []int
cbuilder *choiceBuilder
cparser *choiceParser
validated bool
initialized bool
name string
id int
commit CommitType
options []string
optionDefs []definition
generalizations []int
cparser *choiceParser
cbuilder *choiceBuilder
validated bool
initialized bool
}
type choiceParser struct {
name string
id int
commit CommitType
elements []parser
includedBy []int
name string
id int
commit CommitType
options []parser
generalizations []int
}
type choiceBuilder struct {
name string
id int
commit CommitType
elements []builder
includedBy *idSet
name string
id int
commit CommitType
options []builder
}
func newChoice(name string, ct CommitType, elements []string) *choiceDefinition {
func newChoice(name string, ct CommitType, options []string) *choiceDefinition {
return &choiceDefinition{
name: name,
commit: ct,
elements: elements,
name: name,
commit: ct,
options: options,
}
}
@ -50,10 +49,10 @@ func (d *choiceDefinition) validate(r *registry) error {
}
d.validated = true
for i := range d.elements {
e, ok := r.definitions[d.elements[i]]
for i := range d.options {
e, ok := r.definitions[d.options[i]]
if !ok {
return parserNotFound(d.elements[i])
return parserNotFound(d.options[i])
}
if err := e.validate(r); err != nil {
@ -70,20 +69,19 @@ func (d *choiceDefinition) ensureBuilder() {
}
d.cbuilder = &choiceBuilder{
name: d.name,
id: d.id,
commit: d.commit,
includedBy: &idSet{},
name: d.name,
id: d.id,
commit: d.commit,
}
}
func (d *choiceDefinition) initElements(r *registry) {
for _, e := range d.elements {
func (d *choiceDefinition) initOptions(r *registry) {
for _, e := range d.options {
def := r.definitions[e]
d.elementDefs = append(d.elementDefs, def)
d.optionDefs = append(d.optionDefs, def)
def.init(r)
d.cbuilder.elements = append(d.cbuilder.elements, def.builder())
def.setIncludedBy(d.id)
d.cbuilder.options = append(d.cbuilder.options, def.builder())
def.addGeneralization(d.id)
}
}
@ -94,35 +92,34 @@ func (d *choiceDefinition) init(r *registry) {
d.initialized = true
d.ensureBuilder()
d.initElements(r)
d.initOptions(r)
}
func (d *choiceDefinition) setIncludedBy(includedBy int) {
if intsContain(d.includedBy, includedBy) {
func (d *choiceDefinition) addGeneralization(g int) {
if intsContain(d.generalizations, g) {
return
}
d.includedBy = append(d.includedBy, includedBy)
d.generalizations = append(d.generalizations, g)
d.ensureBuilder()
d.cbuilder.includedBy.set(includedBy)
for _, e := range d.elementDefs {
e.setIncludedBy(includedBy)
for _, e := range d.optionDefs {
e.addGeneralization(g)
}
}
func (d *choiceDefinition) createParser() {
d.cparser = &choiceParser{
name: d.name,
id: d.id,
commit: d.commit,
includedBy: d.includedBy,
name: d.name,
id: d.id,
commit: d.commit,
generalizations: d.generalizations,
}
}
func (d *choiceDefinition) createElementParsers() {
for _, def := range d.elementDefs {
element := def.parser()
d.cparser.elements = append(d.cparser.elements, element)
func (d *choiceDefinition) createOptionParsers() {
for _, def := range d.optionDefs {
option := def.parser()
d.cparser.options = append(d.cparser.options, option)
}
}
@ -132,13 +129,14 @@ func (d *choiceDefinition) parser() parser {
}
d.createParser()
d.createElementParsers()
d.createOptionParsers()
return d.cparser
}
func (d *choiceDefinition) builder() builder { return d.cbuilder }
func (p *choiceParser) nodeName() string { return p.name }
func (p *choiceParser) nodeID() int { return p.id }
func (p *choiceParser) nodeName() string { return p.name }
func (p *choiceParser) nodeID() int { return p.id }
func (p *choiceParser) parse(c *context) {
if c.fromResults(p.id) {
@ -155,21 +153,21 @@ func (p *choiceParser) parse(c *context) {
to := c.offset
var match bool
var elementIndex int
var optionIndex int
var foundMatch bool
for {
foundMatch = false
elementIndex = 0
optionIndex = 0
// TODO:
// - avoid double parsing by setting first-from-store in the context, prepare in advance to
// know whether it can be it's own item
// - it is also important to figure why disabling the failed elements breaks the parsing
// - it is also important to figure why disabling the failed options breaks the parsing
for elementIndex < len(p.elements) {
p.elements[elementIndex].parse(c)
elementIndex++
for optionIndex < len(p.options) {
p.options[optionIndex].parse(c)
optionIndex++
if !c.matchLast || match && c.offset <= to {
c.offset = from
@ -204,26 +202,26 @@ func (b *choiceBuilder) nodeName() string { return b.name }
func (b *choiceBuilder) nodeID() int { return b.id }
func (b *choiceBuilder) build(c *context) ([]*Node, bool) {
to, ok := c.results.takeMatch(c.offset, b.id, b.includedBy)
to, ok := c.results.takeMatch(c.offset, b.id)
if !ok {
return nil, false
}
var element builder
for _, e := range b.elements {
var option builder
for _, e := range b.options {
if c.results.hasMatchTo(c.offset, e.nodeID(), to) {
element = e
option = e
break
}
}
if element == nil {
if option == nil {
panic("damaged parse result")
}
from := c.offset
n, ok := element.build(c)
n, ok := option.build(c)
if !ok {
panic("damaged parse result")
}

View File

@ -55,7 +55,7 @@ func (s *results) hasMatchTo(offset, id, to int) bool {
return false
}
func (s *results) takeMatch(offset, id int, includedBy *idSet) (int, bool) {
func (s *results) takeMatch(offset, id int) (int, bool) {
if len(s.match) <= offset {
return 0, false
}
@ -81,29 +81,11 @@ func (s *results) takeMatch(offset, id int, includedBy *idSet) (int, bool) {
if found && to-offset > 0 {
s.match[offset][index] = -1
for i := 0; i < len(s.match[offset]); i += 2 {
if includedBy.has(s.match[offset][i]) && s.match[offset][i+1] == to {
s.match[offset][i] = -1
}
}
}
return to, found
}
func (s *results) takeMatchLength(offset, id, to int) {
if len(s.match) <= offset {
return
}
for i := 0; i < len(s.match[offset]); i += 2 {
if s.match[offset][i] == id && s.match[offset][i+1] == to {
s.match[offset][i] = -1
return
}
}
}
func (s *results) ensureOffset(offset int) {
if len(s.match) > offset {
return

View File

@ -1,38 +1,37 @@
package treerack
type sequenceDefinition struct {
name string
id int
commit CommitType
items []SequenceItem
itemDefs []definition
includedBy []int
ranges [][]int
sbuilder *sequenceBuilder
sparser *sequenceParser
allChars bool
validated bool
initialized bool
name string
id int
commit CommitType
items []SequenceItem
itemDefs []definition
ranges [][]int
generalizations []int
sbuilder *sequenceBuilder
sparser *sequenceParser
allChars bool
validated bool
initialized bool
}
type sequenceParser struct {
name string
id int
commit CommitType
items []parser
ranges [][]int
includedBy []int
allChars bool
name string
id int
commit CommitType
items []parser
ranges [][]int
generalizations []int
allChars bool
}
type sequenceBuilder struct {
name string
id int
commit CommitType
items []builder
ranges [][]int
includedBy *idSet
allChars bool
name string
id int
commit CommitType
items []builder
ranges [][]int
allChars bool
}
func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition {
@ -70,20 +69,15 @@ func (d *sequenceDefinition) validate(r *registry) error {
return nil
}
func (d *sequenceDefinition) includeItems() bool {
return len(d.items) == 1 && d.items[0].Max == 1
}
func (d *sequenceDefinition) ensureBuilder() {
if d.sbuilder != nil {
return
}
d.sbuilder = &sequenceBuilder{
name: d.name,
id: d.id,
commit: d.commit,
includedBy: &idSet{},
name: d.name,
id: d.id,
commit: d.commit,
}
}
@ -117,6 +111,10 @@ func (d *sequenceDefinition) initItems(r *registry) {
d.allChars = allChars
}
func (d *sequenceDefinition) canHaveSpecializations() bool {
return len(d.items) == 1 && d.items[0].Max == 1
}
func (d *sequenceDefinition) init(r *registry) {
if d.initialized {
return
@ -127,32 +125,31 @@ func (d *sequenceDefinition) init(r *registry) {
d.ensureBuilder()
d.sbuilder.ranges = d.ranges
d.initItems(r)
if d.includeItems() {
d.itemDefs[0].setIncludedBy(d.id)
if d.canHaveSpecializations() {
d.itemDefs[0].addGeneralization(d.id)
}
}
func (d *sequenceDefinition) setIncludedBy(includedBy int) {
if intsContain(d.includedBy, includedBy) {
func (d *sequenceDefinition) addGeneralization(g int) {
if intsContain(d.generalizations, g) {
return
}
d.includedBy = append(d.includedBy, includedBy)
d.generalizations = append(d.generalizations, g)
d.ensureBuilder()
d.sbuilder.includedBy.set(includedBy)
if d.includeItems() {
d.itemDefs[0].setIncludedBy(includedBy)
if d.canHaveSpecializations() {
d.itemDefs[0].addGeneralization(g)
}
}
func (d *sequenceDefinition) createParser() {
d.sparser = &sequenceParser{
name: d.name,
id: d.id,
commit: d.commit,
includedBy: d.includedBy,
allChars: d.allChars,
ranges: d.ranges,
name: d.name,
id: d.id,
commit: d.commit,
generalizations: d.generalizations,
allChars: d.allChars,
ranges: d.ranges,
}
}
@ -174,8 +171,9 @@ func (d *sequenceDefinition) parser() parser {
}
func (d *sequenceDefinition) builder() builder { return d.sbuilder }
func (p *sequenceParser) nodeName() string { return p.name }
func (p *sequenceParser) nodeID() int { return p.id }
func (p *sequenceParser) nodeName() string { return p.name }
func (p *sequenceParser) nodeID() int { return p.id }
func (p *sequenceParser) parse(c *context) {
if !p.allChars {
@ -194,7 +192,9 @@ func (p *sequenceParser) parse(c *context) {
var parsed bool
for itemIndex < len(p.items) {
// TODO: is it ok to parse before max range check? what if max=0
// TODO:
// - is it ok to parse before max range check? what if max=0
// - validate, normalize and document max=0
p.items[itemIndex].parse(c)
if !c.matchLast {
if currentCount < p.ranges[itemIndex][0] {
@ -219,23 +219,21 @@ func (p *sequenceParser) parse(c *context) {
to = c.offset
// TODO: max cannot be 0
if !parsed || p.ranges[itemIndex][1] >= 0 && currentCount == p.ranges[itemIndex][1] {
itemIndex++
currentCount = 0
}
}
if !p.allChars {
for _, includedBy := range p.includedBy {
if c.pending(from, includedBy) {
c.results.setMatch(from, includedBy, to)
}
for _, g := range p.generalizations {
if c.pending(from, g) {
c.results.setMatch(from, g, to)
}
}
c.results.setMatch(from, p.id, to)
c.success(to)
if !p.allChars {
c.unmarkPending(from, p.id)
}
@ -245,12 +243,11 @@ func (b *sequenceBuilder) nodeName() string { return b.name }
func (b *sequenceBuilder) nodeID() int { return b.id }
func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
to, ok := c.results.takeMatch(c.offset, b.id, b.includedBy)
to, ok := c.results.takeMatch(c.offset, b.id)
if !ok {
return nil, false
}
// maybe something like this:
if to-c.offset == 0 && b.commit&Alias != 0 {
return nil, true
}
@ -290,8 +287,6 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) {
continue
}
// maybe can handle the commit type differently
parsed := c.offset > itemFrom
if parsed || len(n) > 0 {
nodes = append(nodes, n...)

View File

@ -42,7 +42,7 @@ type definition interface {
setID(int)
validate(*registry) error
init(*registry)
setIncludedBy(int)
addGeneralization(int)
parser() parser
builder() builder
}
@ -231,16 +231,16 @@ func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) err
return s.sequence(name, ct, items...)
}
func (s *Syntax) choice(name string, ct CommitType, elements ...string) error {
return s.register(newChoice(name, ct, elements))
func (s *Syntax) choice(name string, ct CommitType, options ...string) error {
return s.register(newChoice(name, ct, options))
}
func (s *Syntax) Choice(name string, ct CommitType, elements ...string) error {
func (s *Syntax) Choice(name string, ct CommitType, options ...string) error {
if !isValidSymbol(name) {
return ErrInvalidSymbolName
}
return s.choice(name, ct, elements...)
return s.choice(name, ct, options...)
}
func (s *Syntax) Read(r io.Reader) error {

View File

@ -43,10 +43,10 @@ quantity:alias = count-quantifier
item:nows = (terminal | symbol | group) quantity?;
sequence = item+;
element:alias = terminal | symbol | group | sequence;
option:alias = terminal | symbol | group | sequence;
// DOC: how the order matters
choice = element ("|" element)+;
choice = option ("|" option)+;
// DOC: not having 'not' needs some tricks sometimes