From 77c33564271f7039159848b779a7021e35731928 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Thu, 2 Nov 2017 22:19:03 +0100 Subject: [PATCH] refactor parse phase - parsers --- bootsyntax.go | 8 +-- char.go | 22 ++++---- choice.go | 130 ++++++++++++++++++++++++------------------------ results.go | 20 +------- sequence.go | 117 +++++++++++++++++++++---------------------- syntax.go | 10 ++-- syntax.treerack | 4 +- 7 files changed, 141 insertions(+), 170 deletions(-) diff --git a/bootsyntax.go b/bootsyntax.go index 6668c11..f46d5ea 100644 --- a/bootsyntax.go +++ b/bootsyntax.go @@ -204,15 +204,15 @@ var bootSyntaxDefs = [][]string{{ }, { "sequence", "sequence", "none", "item", "items-continue", }, { - "choice", "element", "alias", "terminal", "symbol", "group", "sequence", + "choice", "option", "alias", "terminal", "symbol", "group", "sequence", }, { "chars", "pipe", "alias", "|", }, { - "sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element", + "sequence", "option-continue", "alias", "wscs", "pipe", "wscs", "option", }, { - "sequence", "elements-continue", "alias", "element-continue:1:-1", + "sequence", "options-continue", "alias", "option-continue:1:-1", }, { - "sequence", "choice", "none", "element", "elements-continue", + "sequence", "choice", "none", "option", "options-continue", }, { "choice", "expression", diff --git a/char.go b/char.go index 95942d3..7804fa9 100644 --- a/char.go +++ b/char.go @@ -1,12 +1,12 @@ package treerack type charParser struct { - name string - id int - not bool - chars []rune - ranges [][]rune - includedBy []int + name string + id int + not bool + chars []rune + ranges [][]rune + generalizations []int } func newChar( @@ -32,12 +32,12 @@ func (p *charParser) setCommitType(ct CommitType) {} func (p *charParser) validate(*registry) error { return nil } func (p *charParser) init(*registry) {} -func (p *charParser) setIncludedBy(includedBy int) { - if intsContain(p.includedBy, includedBy) { +func (p *charParser) addGeneralization(g int) { + if intsContain(p.generalizations, g) { return } - p.includedBy = append(p.includedBy, includedBy) + p.generalizations = append(p.generalizations, g) } func (p *charParser) parser() parser { return p } @@ -69,10 +69,6 @@ func (p *charParser) parse(c *context) { return } - for _, includedBy := range p.includedBy { - c.results.setMatch(c.offset, includedBy, c.offset+1) - } - c.success(c.offset + 1) } diff --git a/choice.go b/choice.go index 3af9157..025dc4a 100644 --- a/choice.go +++ b/choice.go @@ -1,39 +1,38 @@ package treerack type choiceDefinition struct { - name string - id int - commit CommitType - elements []string - elementDefs []definition - includedBy []int - cbuilder *choiceBuilder - cparser *choiceParser - validated bool - initialized bool + name string + id int + commit CommitType + options []string + optionDefs []definition + generalizations []int + cparser *choiceParser + cbuilder *choiceBuilder + validated bool + initialized bool } type choiceParser struct { - name string - id int - commit CommitType - elements []parser - includedBy []int + name string + id int + commit CommitType + options []parser + generalizations []int } type choiceBuilder struct { - name string - id int - commit CommitType - elements []builder - includedBy *idSet + name string + id int + commit CommitType + options []builder } -func newChoice(name string, ct CommitType, elements []string) *choiceDefinition { +func newChoice(name string, ct CommitType, options []string) *choiceDefinition { return &choiceDefinition{ - name: name, - commit: ct, - elements: elements, + name: name, + commit: ct, + options: options, } } @@ -50,10 +49,10 @@ func (d *choiceDefinition) validate(r *registry) error { } d.validated = true - for i := range d.elements { - e, ok := r.definitions[d.elements[i]] + for i := range d.options { + e, ok := r.definitions[d.options[i]] if !ok { - return parserNotFound(d.elements[i]) + return parserNotFound(d.options[i]) } if err := e.validate(r); err != nil { @@ -70,20 +69,19 @@ func (d *choiceDefinition) ensureBuilder() { } d.cbuilder = &choiceBuilder{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: &idSet{}, + name: d.name, + id: d.id, + commit: d.commit, } } -func (d *choiceDefinition) initElements(r *registry) { - for _, e := range d.elements { +func (d *choiceDefinition) initOptions(r *registry) { + for _, e := range d.options { def := r.definitions[e] - d.elementDefs = append(d.elementDefs, def) + d.optionDefs = append(d.optionDefs, def) def.init(r) - d.cbuilder.elements = append(d.cbuilder.elements, def.builder()) - def.setIncludedBy(d.id) + d.cbuilder.options = append(d.cbuilder.options, def.builder()) + def.addGeneralization(d.id) } } @@ -94,35 +92,34 @@ func (d *choiceDefinition) init(r *registry) { d.initialized = true d.ensureBuilder() - d.initElements(r) + d.initOptions(r) } -func (d *choiceDefinition) setIncludedBy(includedBy int) { - if intsContain(d.includedBy, includedBy) { +func (d *choiceDefinition) addGeneralization(g int) { + if intsContain(d.generalizations, g) { return } - d.includedBy = append(d.includedBy, includedBy) + d.generalizations = append(d.generalizations, g) d.ensureBuilder() - d.cbuilder.includedBy.set(includedBy) - for _, e := range d.elementDefs { - e.setIncludedBy(includedBy) + for _, e := range d.optionDefs { + e.addGeneralization(g) } } func (d *choiceDefinition) createParser() { d.cparser = &choiceParser{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: d.includedBy, + name: d.name, + id: d.id, + commit: d.commit, + generalizations: d.generalizations, } } -func (d *choiceDefinition) createElementParsers() { - for _, def := range d.elementDefs { - element := def.parser() - d.cparser.elements = append(d.cparser.elements, element) +func (d *choiceDefinition) createOptionParsers() { + for _, def := range d.optionDefs { + option := def.parser() + d.cparser.options = append(d.cparser.options, option) } } @@ -132,13 +129,14 @@ func (d *choiceDefinition) parser() parser { } d.createParser() - d.createElementParsers() + d.createOptionParsers() return d.cparser } func (d *choiceDefinition) builder() builder { return d.cbuilder } -func (p *choiceParser) nodeName() string { return p.name } -func (p *choiceParser) nodeID() int { return p.id } + +func (p *choiceParser) nodeName() string { return p.name } +func (p *choiceParser) nodeID() int { return p.id } func (p *choiceParser) parse(c *context) { if c.fromResults(p.id) { @@ -155,21 +153,21 @@ func (p *choiceParser) parse(c *context) { to := c.offset var match bool - var elementIndex int + var optionIndex int var foundMatch bool for { foundMatch = false - elementIndex = 0 + optionIndex = 0 // TODO: // - avoid double parsing by setting first-from-store in the context, prepare in advance to // know whether it can be it's own item - // - it is also important to figure why disabling the failed elements breaks the parsing + // - it is also important to figure why disabling the failed options breaks the parsing - for elementIndex < len(p.elements) { - p.elements[elementIndex].parse(c) - elementIndex++ + for optionIndex < len(p.options) { + p.options[optionIndex].parse(c) + optionIndex++ if !c.matchLast || match && c.offset <= to { c.offset = from @@ -204,26 +202,26 @@ func (b *choiceBuilder) nodeName() string { return b.name } func (b *choiceBuilder) nodeID() int { return b.id } func (b *choiceBuilder) build(c *context) ([]*Node, bool) { - to, ok := c.results.takeMatch(c.offset, b.id, b.includedBy) + to, ok := c.results.takeMatch(c.offset, b.id) if !ok { return nil, false } - var element builder - for _, e := range b.elements { + var option builder + for _, e := range b.options { if c.results.hasMatchTo(c.offset, e.nodeID(), to) { - element = e + option = e break } } - if element == nil { + if option == nil { panic("damaged parse result") } from := c.offset - n, ok := element.build(c) + n, ok := option.build(c) if !ok { panic("damaged parse result") } diff --git a/results.go b/results.go index ca21875..85c8354 100644 --- a/results.go +++ b/results.go @@ -55,7 +55,7 @@ func (s *results) hasMatchTo(offset, id, to int) bool { return false } -func (s *results) takeMatch(offset, id int, includedBy *idSet) (int, bool) { +func (s *results) takeMatch(offset, id int) (int, bool) { if len(s.match) <= offset { return 0, false } @@ -81,29 +81,11 @@ func (s *results) takeMatch(offset, id int, includedBy *idSet) (int, bool) { if found && to-offset > 0 { s.match[offset][index] = -1 - for i := 0; i < len(s.match[offset]); i += 2 { - if includedBy.has(s.match[offset][i]) && s.match[offset][i+1] == to { - s.match[offset][i] = -1 - } - } } return to, found } -func (s *results) takeMatchLength(offset, id, to int) { - if len(s.match) <= offset { - return - } - - for i := 0; i < len(s.match[offset]); i += 2 { - if s.match[offset][i] == id && s.match[offset][i+1] == to { - s.match[offset][i] = -1 - return - } - } -} - func (s *results) ensureOffset(offset int) { if len(s.match) > offset { return diff --git a/sequence.go b/sequence.go index 4b4f585..e55ceb4 100644 --- a/sequence.go +++ b/sequence.go @@ -1,38 +1,37 @@ package treerack type sequenceDefinition struct { - name string - id int - commit CommitType - items []SequenceItem - itemDefs []definition - includedBy []int - ranges [][]int - sbuilder *sequenceBuilder - sparser *sequenceParser - allChars bool - validated bool - initialized bool + name string + id int + commit CommitType + items []SequenceItem + itemDefs []definition + ranges [][]int + generalizations []int + sbuilder *sequenceBuilder + sparser *sequenceParser + allChars bool + validated bool + initialized bool } type sequenceParser struct { - name string - id int - commit CommitType - items []parser - ranges [][]int - includedBy []int - allChars bool + name string + id int + commit CommitType + items []parser + ranges [][]int + generalizations []int + allChars bool } type sequenceBuilder struct { - name string - id int - commit CommitType - items []builder - ranges [][]int - includedBy *idSet - allChars bool + name string + id int + commit CommitType + items []builder + ranges [][]int + allChars bool } func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition { @@ -70,20 +69,15 @@ func (d *sequenceDefinition) validate(r *registry) error { return nil } -func (d *sequenceDefinition) includeItems() bool { - return len(d.items) == 1 && d.items[0].Max == 1 -} - func (d *sequenceDefinition) ensureBuilder() { if d.sbuilder != nil { return } d.sbuilder = &sequenceBuilder{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: &idSet{}, + name: d.name, + id: d.id, + commit: d.commit, } } @@ -117,6 +111,10 @@ func (d *sequenceDefinition) initItems(r *registry) { d.allChars = allChars } +func (d *sequenceDefinition) canHaveSpecializations() bool { + return len(d.items) == 1 && d.items[0].Max == 1 +} + func (d *sequenceDefinition) init(r *registry) { if d.initialized { return @@ -127,32 +125,31 @@ func (d *sequenceDefinition) init(r *registry) { d.ensureBuilder() d.sbuilder.ranges = d.ranges d.initItems(r) - if d.includeItems() { - d.itemDefs[0].setIncludedBy(d.id) + if d.canHaveSpecializations() { + d.itemDefs[0].addGeneralization(d.id) } } -func (d *sequenceDefinition) setIncludedBy(includedBy int) { - if intsContain(d.includedBy, includedBy) { +func (d *sequenceDefinition) addGeneralization(g int) { + if intsContain(d.generalizations, g) { return } - d.includedBy = append(d.includedBy, includedBy) + d.generalizations = append(d.generalizations, g) d.ensureBuilder() - d.sbuilder.includedBy.set(includedBy) - if d.includeItems() { - d.itemDefs[0].setIncludedBy(includedBy) + if d.canHaveSpecializations() { + d.itemDefs[0].addGeneralization(g) } } func (d *sequenceDefinition) createParser() { d.sparser = &sequenceParser{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: d.includedBy, - allChars: d.allChars, - ranges: d.ranges, + name: d.name, + id: d.id, + commit: d.commit, + generalizations: d.generalizations, + allChars: d.allChars, + ranges: d.ranges, } } @@ -174,8 +171,9 @@ func (d *sequenceDefinition) parser() parser { } func (d *sequenceDefinition) builder() builder { return d.sbuilder } -func (p *sequenceParser) nodeName() string { return p.name } -func (p *sequenceParser) nodeID() int { return p.id } + +func (p *sequenceParser) nodeName() string { return p.name } +func (p *sequenceParser) nodeID() int { return p.id } func (p *sequenceParser) parse(c *context) { if !p.allChars { @@ -194,7 +192,9 @@ func (p *sequenceParser) parse(c *context) { var parsed bool for itemIndex < len(p.items) { - // TODO: is it ok to parse before max range check? what if max=0 + // TODO: + // - is it ok to parse before max range check? what if max=0 + // - validate, normalize and document max=0 p.items[itemIndex].parse(c) if !c.matchLast { if currentCount < p.ranges[itemIndex][0] { @@ -219,23 +219,21 @@ func (p *sequenceParser) parse(c *context) { to = c.offset + // TODO: max cannot be 0 if !parsed || p.ranges[itemIndex][1] >= 0 && currentCount == p.ranges[itemIndex][1] { itemIndex++ currentCount = 0 } } - if !p.allChars { - for _, includedBy := range p.includedBy { - if c.pending(from, includedBy) { - c.results.setMatch(from, includedBy, to) - } + for _, g := range p.generalizations { + if c.pending(from, g) { + c.results.setMatch(from, g, to) } } c.results.setMatch(from, p.id, to) c.success(to) - if !p.allChars { c.unmarkPending(from, p.id) } @@ -245,12 +243,11 @@ func (b *sequenceBuilder) nodeName() string { return b.name } func (b *sequenceBuilder) nodeID() int { return b.id } func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { - to, ok := c.results.takeMatch(c.offset, b.id, b.includedBy) + to, ok := c.results.takeMatch(c.offset, b.id) if !ok { return nil, false } - // maybe something like this: if to-c.offset == 0 && b.commit&Alias != 0 { return nil, true } @@ -290,8 +287,6 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { continue } - // maybe can handle the commit type differently - parsed := c.offset > itemFrom if parsed || len(n) > 0 { nodes = append(nodes, n...) diff --git a/syntax.go b/syntax.go index 4c41834..e4ab1c2 100644 --- a/syntax.go +++ b/syntax.go @@ -42,7 +42,7 @@ type definition interface { setID(int) validate(*registry) error init(*registry) - setIncludedBy(int) + addGeneralization(int) parser() parser builder() builder } @@ -231,16 +231,16 @@ func (s *Syntax) Sequence(name string, ct CommitType, items ...SequenceItem) err return s.sequence(name, ct, items...) } -func (s *Syntax) choice(name string, ct CommitType, elements ...string) error { - return s.register(newChoice(name, ct, elements)) +func (s *Syntax) choice(name string, ct CommitType, options ...string) error { + return s.register(newChoice(name, ct, options)) } -func (s *Syntax) Choice(name string, ct CommitType, elements ...string) error { +func (s *Syntax) Choice(name string, ct CommitType, options ...string) error { if !isValidSymbol(name) { return ErrInvalidSymbolName } - return s.choice(name, ct, elements...) + return s.choice(name, ct, options...) } func (s *Syntax) Read(r io.Reader) error { diff --git a/syntax.treerack b/syntax.treerack index 967ec20..3b4e4f6 100644 --- a/syntax.treerack +++ b/syntax.treerack @@ -43,10 +43,10 @@ quantity:alias = count-quantifier item:nows = (terminal | symbol | group) quantity?; sequence = item+; -element:alias = terminal | symbol | group | sequence; +option:alias = terminal | symbol | group | sequence; // DOC: how the order matters -choice = element ("|" element)+; +choice = option ("|" option)+; // DOC: not having 'not' needs some tricks sometimes