refactor init phase - definitions

This commit is contained in:
Arpad Ryszka 2017-11-01 03:54:53 +01:00
parent 52b2bcc751
commit e0f61f9d28
7 changed files with 171 additions and 257 deletions

16
char.go
View File

@ -32,7 +32,7 @@ func (p *charParser) setCommitType(ct CommitType) {}
func (p *charParser) validate(*registry) error { return nil } func (p *charParser) validate(*registry) error { return nil }
func (p *charParser) init(*registry) {} func (p *charParser) init(*registry) {}
func (p *charParser) setIncludedBy(r *registry, includedBy int) { func (p *charParser) setIncludedBy(includedBy int) {
if intsContain(p.includedBy, includedBy) { if intsContain(p.includedBy, includedBy) {
return return
} }
@ -40,18 +40,8 @@ func (p *charParser) setIncludedBy(r *registry, includedBy int) {
p.includedBy = append(p.includedBy, includedBy) p.includedBy = append(p.includedBy, includedBy)
} }
func (p *charParser) parser(r *registry) parser { func (p *charParser) parser() parser { return p }
if _, ok := r.parser(p.name); ok { func (p *charParser) builder() builder { return p }
return p
}
r.setParser(p)
return p
}
func (p *charParser) builder() builder {
return p
}
func matchChars(chars []rune, ranges [][]rune, not bool, char rune) bool { func matchChars(chars []rune, ranges [][]rune, not bool, char rune) bool {
for _, ci := range chars { for _, ci := range chars {

117
choice.go
View File

@ -5,8 +5,10 @@ type choiceDefinition struct {
id int id int
commit CommitType commit CommitType
elements []string elements []string
elementDefs []definition
includedBy []int includedBy []int
cbuilder *choiceBuilder cbuilder *choiceBuilder
cparser *choiceParser
validated bool validated bool
initialized bool initialized bool
} }
@ -48,7 +50,6 @@ func (d *choiceDefinition) validate(r *registry) error {
} }
d.validated = true d.validated = true
for i := range d.elements { for i := range d.elements {
e, ok := r.definitions[d.elements[i]] e, ok := r.definitions[d.elements[i]]
if !ok { if !ok {
@ -63,101 +64,79 @@ func (d *choiceDefinition) validate(r *registry) error {
return nil return nil
} }
func (d *choiceDefinition) ensureBuilder() {
if d.cbuilder != nil {
return
}
d.cbuilder = &choiceBuilder{
name: d.name,
id: d.id,
commit: d.commit,
includedBy: &idSet{},
}
}
func (d *choiceDefinition) initElements(r *registry) {
for _, e := range d.elements {
def := r.definitions[e]
d.elementDefs = append(d.elementDefs, def)
def.init(r)
d.cbuilder.elements = append(d.cbuilder.elements, def.builder())
def.setIncludedBy(d.id)
}
}
func (d *choiceDefinition) init(r *registry) { func (d *choiceDefinition) init(r *registry) {
if d.initialized { if d.initialized {
return return
} }
d.initialized = true d.initialized = true
d.ensureBuilder()
if d.cbuilder == nil { d.initElements(r)
d.cbuilder = &choiceBuilder{
name: d.name,
id: d.id,
commit: d.commit,
includedBy: &idSet{},
}
}
for _, e := range d.elements {
def := r.definitions[e]
d.cbuilder.elements = append(d.cbuilder.elements, def.builder())
def.init(r)
def.setIncludedBy(r, d.id)
}
} }
func (d *choiceDefinition) setIncludedBy(r *registry, includedBy int) { func (d *choiceDefinition) setIncludedBy(includedBy int) {
if intsContain(d.includedBy, includedBy) { if intsContain(d.includedBy, includedBy) {
return return
} }
d.includedBy = append(d.includedBy, includedBy) d.includedBy = append(d.includedBy, includedBy)
d.ensureBuilder()
if d.cbuilder == nil {
d.cbuilder = &choiceBuilder{
name: d.name,
id: d.id,
commit: d.commit,
includedBy: &idSet{},
}
}
d.cbuilder.includedBy.set(includedBy) d.cbuilder.includedBy.set(includedBy)
for _, e := range d.elementDefs {
for _, e := range d.elements { e.setIncludedBy(includedBy)
r.definitions[e].setIncludedBy(r, includedBy)
} }
} }
// TODO: func (d *choiceDefinition) createParser() {
// - it may be possible to initialize the parsers non-recursively d.cparser = &choiceParser{
// - maybe the whole definition, parser and builder can be united
func (d *choiceDefinition) parser(r *registry) parser {
p, ok := r.parser(d.name)
if ok {
return p
}
cp := &choiceParser{
name: d.name, name: d.name,
id: d.id, id: d.id,
commit: d.commit, commit: d.commit,
includedBy: d.includedBy, includedBy: d.includedBy,
} }
r.setParser(cp)
var elements []parser
for _, e := range d.elements {
element, ok := r.parser(e)
if ok {
elements = append(elements, element)
continue
}
element = r.definitions[e].parser(r)
elements = append(elements, element)
}
cp.elements = elements
return cp
} }
func (d *choiceDefinition) builder() builder { func (d *choiceDefinition) createElementParsers() {
if d.cbuilder == nil { for _, def := range d.elementDefs {
d.cbuilder = &choiceBuilder{ element := def.parser()
name: d.name, d.cparser.elements = append(d.cparser.elements, element)
id: d.id,
commit: d.commit,
includedBy: &idSet{},
} }
}
return d.cbuilder
} }
func (d *choiceDefinition) parser() parser {
if d.cparser != nil {
return d.cparser
}
d.createParser()
d.createElementParsers()
return d.cparser
}
func (d *choiceDefinition) builder() builder { return d.cbuilder }
func (p *choiceParser) nodeName() string { return p.name } func (p *choiceParser) nodeName() string { return p.name }
func (p *choiceParser) nodeID() int { return p.id } func (p *choiceParser) nodeID() int { return p.id }

View File

@ -22,6 +22,8 @@ code generation go:
- char matches can be generated into switches - char matches can be generated into switches
code generation js code generation js
documentation flag documentation flag
support custom tokenization
streaming
[problems] [problems]
can the root be an alias? check the commit mechanism can the root be an alias? check the commit mechanism

View File

@ -1,74 +0,0 @@
package treerack
import "fmt"
type definition interface {
nodeName() string
setNodeName(string)
nodeID() int
commitType() CommitType
setCommitType(CommitType)
setID(int)
validate(*registry) error
init(*registry)
setIncludedBy(*registry, int)
parser(*registry) parser
builder() builder
}
type parser interface {
nodeName() string
nodeID() int
parse(*context)
}
type builder interface {
nodeName() string
nodeID() int
build(*context) ([]*Node, bool)
}
func parserNotFound(name string) error {
return fmt.Errorf("parser not found: %s", name)
}
func cannotIncludeParsers(name string) error {
return fmt.Errorf("parser: %s cannot include other parsers", name)
}
func intsContain(is []int, i int) bool {
for _, ii := range is {
if ii == i {
return true
}
}
return false
}
func parse(p parser, c *context) error {
p.parse(c)
if c.readErr != nil {
return c.readErr
}
if !c.match {
return ErrInvalidInput
}
if err := c.finalize(p); err != nil {
return err
}
return nil
}
func build(b builder, c *context) *Node {
c.offset = 0
n, ok := b.build(c)
if !ok || len(n) != 1 {
panic("damaged parse result")
}
return n[0]
}

View File

@ -28,11 +28,6 @@ func (r *registry) definition(name string) (definition, bool) {
return d, ok return d, ok
} }
func (r *registry) parser(name string) (parser, bool) {
p, ok := r.parsers[name]
return p, ok
}
func (r *registry) setDefinition(d definition) error { func (r *registry) setDefinition(d definition) error {
if _, ok := r.definitions[d.nodeName()]; ok { if _, ok := r.definitions[d.nodeName()]; ok {
return duplicateDefinition(d.nodeName()) return duplicateDefinition(d.nodeName())
@ -48,10 +43,6 @@ func (r *registry) setDefinition(d definition) error {
return nil return nil
} }
func (r *registry) setParser(p parser) {
r.parsers[p.nodeName()] = p
}
func (r *registry) getDefinitions() []definition { func (r *registry) getDefinitions() []definition {
var defs []definition var defs []definition
for _, def := range r.definitions { for _, def := range r.definitions {

View File

@ -5,9 +5,11 @@ type sequenceDefinition struct {
id int id int
commit CommitType commit CommitType
items []SequenceItem items []SequenceItem
itemDefs []definition
includedBy []int includedBy []int
ranges [][]int ranges [][]int
sbuilder *sequenceBuilder sbuilder *sequenceBuilder
sparser *sequenceParser
allChars bool allChars bool
validated bool validated bool
initialized bool initialized bool
@ -54,7 +56,6 @@ func (d *sequenceDefinition) validate(r *registry) error {
} }
d.validated = true d.validated = true
for i := range d.items { for i := range d.items {
ii, ok := r.definition(d.items[i].Name) ii, ok := r.definition(d.items[i].Name)
if !ok { if !ok {
@ -70,34 +71,23 @@ func (d *sequenceDefinition) validate(r *registry) error {
} }
func (d *sequenceDefinition) includeItems() bool { func (d *sequenceDefinition) includeItems() bool {
return len(d.items) == 1 && d.items[0].Min == 1 && d.items[0].Max == 1 return len(d.items) == 1 && d.items[0].Max == 1
} }
func (d *sequenceDefinition) init(r *registry) { func (d *sequenceDefinition) ensureBuilder() {
if d.initialized { if d.sbuilder != nil {
return return
} }
d.initialized = true
for i := range d.items {
if d.items[i].Min == 0 && d.items[i].Max == 0 {
d.items[i].Min, d.items[i].Max = 1, 1
} else if d.items[i].Max == 0 {
d.items[i].Max = -1
}
}
if d.sbuilder == nil {
d.sbuilder = &sequenceBuilder{ d.sbuilder = &sequenceBuilder{
name: d.name, name: d.name,
id: d.id, id: d.id,
commit: d.commit, commit: d.commit,
includedBy: &idSet{}, includedBy: &idSet{},
} }
} }
allChars := true func (d *sequenceDefinition) initRanges() {
for _, item := range d.items { for _, item := range d.items {
if item.Min == 0 && item.Max == 0 { if item.Min == 0 && item.Max == 0 {
item.Min, item.Max = 1, 1 item.Min, item.Max = 1, 1
@ -106,101 +96,84 @@ func (d *sequenceDefinition) init(r *registry) {
} }
d.ranges = append(d.ranges, []int{item.Min, item.Max}) d.ranges = append(d.ranges, []int{item.Min, item.Max})
}
}
func (d *sequenceDefinition) initItems(r *registry) {
allChars := true
for _, item := range d.items {
def := r.definitions[item.Name] def := r.definitions[item.Name]
d.itemDefs = append(d.itemDefs, def)
def.init(r)
d.sbuilder.items = append(d.sbuilder.items, def.builder()) d.sbuilder.items = append(d.sbuilder.items, def.builder())
if allChars { if allChars {
if _, isChar := def.(*charParser); !isChar { if _, isChar := def.(*charParser); !isChar {
allChars = false allChars = false
} }
} }
def.init(r)
} }
d.sbuilder.ranges = d.ranges
d.sbuilder.allChars = allChars d.sbuilder.allChars = allChars
d.allChars = allChars d.allChars = allChars
}
if !d.includeItems() { func (d *sequenceDefinition) init(r *registry) {
if d.initialized {
return return
} }
r.definitions[d.items[0].Name].setIncludedBy(r, d.id) d.initialized = true
d.initRanges()
d.ensureBuilder()
d.sbuilder.ranges = d.ranges
d.initItems(r)
if d.includeItems() {
d.itemDefs[0].setIncludedBy(d.id)
}
} }
func (d *sequenceDefinition) setIncludedBy(r *registry, includedBy int) { func (d *sequenceDefinition) setIncludedBy(includedBy int) {
if intsContain(d.includedBy, includedBy) { if intsContain(d.includedBy, includedBy) {
return return
} }
d.includedBy = append(d.includedBy, includedBy) d.includedBy = append(d.includedBy, includedBy)
d.ensureBuilder()
if d.sbuilder == nil {
d.sbuilder = &sequenceBuilder{
name: d.name,
id: d.id,
commit: d.commit,
includedBy: &idSet{},
}
}
d.sbuilder.includedBy.set(includedBy) d.sbuilder.includedBy.set(includedBy)
if d.includeItems() {
if !d.includeItems() { d.itemDefs[0].setIncludedBy(includedBy)
return
} }
r.definitions[d.items[0].Name].setIncludedBy(r, includedBy)
} }
func (d *sequenceDefinition) parser(r *registry) parser { func (d *sequenceDefinition) createParser() {
p, ok := r.parser(d.name) d.sparser = &sequenceParser{
if ok {
return p
}
sp := &sequenceParser{
name: d.name, name: d.name,
id: d.id, id: d.id,
commit: d.commit, commit: d.commit,
includedBy: d.includedBy, includedBy: d.includedBy,
allChars: d.allChars, allChars: d.allChars,
ranges: d.ranges,
} }
r.setParser(sp)
var items []parser
for _, item := range d.items {
pi, ok := r.parser(item.Name)
if ok {
items = append(items, pi)
continue
}
pi = r.definitions[item.Name].parser(r)
items = append(items, pi)
}
sp.items = items
sp.ranges = d.ranges
return sp
} }
func (d *sequenceDefinition) builder() builder { func (d *sequenceDefinition) createItemParsers() {
if d.sbuilder == nil { for _, item := range d.itemDefs {
d.sbuilder = &sequenceBuilder{ pi := item.parser()
name: d.name, d.sparser.items = append(d.sparser.items, pi)
id: d.id,
commit: d.commit,
includedBy: &idSet{},
} }
}
return d.sbuilder
} }
func (d *sequenceDefinition) parser() parser {
if d.sparser != nil {
return d.sparser
}
d.createParser()
d.createItemParsers()
return d.sparser
}
func (d *sequenceDefinition) builder() builder { return d.sbuilder }
func (p *sequenceParser) nodeName() string { return p.name } func (p *sequenceParser) nodeName() string { return p.name }
func (p *sequenceParser) nodeID() int { return p.id } func (p *sequenceParser) nodeID() int { return p.id }

View File

@ -33,6 +33,32 @@ type Syntax struct {
builder builder builder builder
} }
type definition interface {
nodeName() string
setNodeName(string)
nodeID() int
commitType() CommitType
setCommitType(CommitType)
setID(int)
validate(*registry) error
init(*registry)
setIncludedBy(int)
parser() parser
builder() builder
}
type parser interface {
nodeName() string
nodeID() int
parse(*context)
}
type builder interface {
nodeName() string
nodeID() int
build(*context) ([]*Node, bool)
}
var ( var (
ErrSyntaxInitialized = errors.New("syntax initialized") ErrSyntaxInitialized = errors.New("syntax initialized")
ErrInitFailed = errors.New("init failed") ErrInitFailed = errors.New("init failed")
@ -49,6 +75,14 @@ var (
ErrInvalidSymbolName = errors.New("invalid symbol name") ErrInvalidSymbolName = errors.New("invalid symbol name")
) )
func duplicateDefinition(name string) error {
return fmt.Errorf("duplicate definition: %s", name)
}
func parserNotFound(name string) error {
return fmt.Errorf("parser not found: %s", name)
}
const symbolChars = "^\\\\ \\n\\t\\b\\f\\r\\v/.\\[\\]\\\"{}\\^+*?|():=;" const symbolChars = "^\\\\ \\n\\t\\b\\f\\r\\v/.\\[\\]\\\"{}\\^+*?|():=;"
func parseSymbolChars(c []rune) []rune { func parseSymbolChars(c []rune) []rune {
@ -62,10 +96,6 @@ func parseSymbolChars(c []rune) []rune {
var symbolCharRunes = parseSymbolChars([]rune(symbolChars)) var symbolCharRunes = parseSymbolChars([]rune(symbolChars))
func duplicateDefinition(name string) error {
return fmt.Errorf("duplicate definition: %s", name)
}
func isValidSymbol(n string) bool { func isValidSymbol(n string) bool {
runes := []rune(n) runes := []rune(n)
for _, r := range runes { for _, r := range runes {
@ -78,6 +108,16 @@ func isValidSymbol(n string) bool {
} }
func intsContain(is []int, i int) bool {
for _, ii := range is {
if ii == i {
return true
}
}
return false
}
func (s *Syntax) applyRoot(d definition) error { func (s *Syntax) applyRoot(d definition) error {
explicitRoot := d.commitType()&Root != 0 explicitRoot := d.commitType()&Root != 0
if explicitRoot && s.explicitRoot { if explicitRoot && s.explicitRoot {
@ -247,7 +287,7 @@ func (s *Syntax) Init() error {
} }
s.root.init(s.registry) s.root.init(s.registry)
s.parser = s.root.parser(s.registry) s.parser = s.root.parser()
s.builder = s.root.builder() s.builder = s.root.builder()
s.initialized = true s.initialized = true
@ -262,17 +302,30 @@ func (s *Syntax) Generate(w io.Writer) error {
return ErrNotImplemented return ErrNotImplemented
} }
// TODO: optimize top sequences to save memory, or just support streaming, or combine the two
func (s *Syntax) Parse(r io.Reader) (*Node, error) { func (s *Syntax) Parse(r io.Reader) (*Node, error) {
if err := s.Init(); err != nil { if err := s.Init(); err != nil {
return nil, err return nil, err
} }
c := newContext(bufio.NewReader(r)) c := newContext(bufio.NewReader(r))
if err := parse(s.parser, c); err != nil { s.parser.parse(c)
if c.readErr != nil {
return nil, c.readErr
}
if !c.match {
return nil, ErrInvalidInput
}
if err := c.finalize(s.parser); err != nil {
return nil, err return nil, err
} }
return build(s.builder, c), nil c.offset = 0
n, ok := s.builder.build(c)
if !ok || len(n) != 1 {
panic("damaged parse result")
}
return n[0], nil
} }