diff --git a/char.go b/char.go index d725b11..2b030fb 100644 --- a/char.go +++ b/char.go @@ -32,7 +32,7 @@ func (p *charParser) setCommitType(ct CommitType) {} func (p *charParser) validate(*registry) error { return nil } func (p *charParser) init(*registry) {} -func (p *charParser) setIncludedBy(r *registry, includedBy int) { +func (p *charParser) setIncludedBy(includedBy int) { if intsContain(p.includedBy, includedBy) { return } @@ -40,18 +40,8 @@ func (p *charParser) setIncludedBy(r *registry, includedBy int) { p.includedBy = append(p.includedBy, includedBy) } -func (p *charParser) parser(r *registry) parser { - if _, ok := r.parser(p.name); ok { - return p - } - - r.setParser(p) - return p -} - -func (p *charParser) builder() builder { - return p -} +func (p *charParser) parser() parser { return p } +func (p *charParser) builder() builder { return p } func matchChars(chars []rune, ranges [][]rune, not bool, char rune) bool { for _, ci := range chars { diff --git a/choice.go b/choice.go index f367ca5..05ba775 100644 --- a/choice.go +++ b/choice.go @@ -5,8 +5,10 @@ type choiceDefinition struct { id int commit CommitType elements []string + elementDefs []definition includedBy []int cbuilder *choiceBuilder + cparser *choiceParser validated bool initialized bool } @@ -48,7 +50,6 @@ func (d *choiceDefinition) validate(r *registry) error { } d.validated = true - for i := range d.elements { e, ok := r.definitions[d.elements[i]] if !ok { @@ -63,103 +64,81 @@ func (d *choiceDefinition) validate(r *registry) error { return nil } +func (d *choiceDefinition) ensureBuilder() { + if d.cbuilder != nil { + return + } + + d.cbuilder = &choiceBuilder{ + name: d.name, + id: d.id, + commit: d.commit, + includedBy: &idSet{}, + } +} + +func (d *choiceDefinition) initElements(r *registry) { + for _, e := range d.elements { + def := r.definitions[e] + d.elementDefs = append(d.elementDefs, def) + def.init(r) + d.cbuilder.elements = append(d.cbuilder.elements, def.builder()) + def.setIncludedBy(d.id) + } +} + func (d *choiceDefinition) init(r *registry) { if d.initialized { return } d.initialized = true - - if d.cbuilder == nil { - d.cbuilder = &choiceBuilder{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: &idSet{}, - } - } - - for _, e := range d.elements { - def := r.definitions[e] - d.cbuilder.elements = append(d.cbuilder.elements, def.builder()) - def.init(r) - def.setIncludedBy(r, d.id) - } + d.ensureBuilder() + d.initElements(r) } -func (d *choiceDefinition) setIncludedBy(r *registry, includedBy int) { +func (d *choiceDefinition) setIncludedBy(includedBy int) { if intsContain(d.includedBy, includedBy) { return } d.includedBy = append(d.includedBy, includedBy) - - if d.cbuilder == nil { - d.cbuilder = &choiceBuilder{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: &idSet{}, - } - } - + d.ensureBuilder() d.cbuilder.includedBy.set(includedBy) - - for _, e := range d.elements { - r.definitions[e].setIncludedBy(r, includedBy) + for _, e := range d.elementDefs { + e.setIncludedBy(includedBy) } } -// TODO: -// - it may be possible to initialize the parsers non-recursively -// - maybe the whole definition, parser and builder can be united - -func (d *choiceDefinition) parser(r *registry) parser { - p, ok := r.parser(d.name) - if ok { - return p - } - - cp := &choiceParser{ +func (d *choiceDefinition) createParser() { + d.cparser = &choiceParser{ name: d.name, id: d.id, commit: d.commit, includedBy: d.includedBy, } - - r.setParser(cp) - - var elements []parser - for _, e := range d.elements { - element, ok := r.parser(e) - if ok { - elements = append(elements, element) - continue - } - - element = r.definitions[e].parser(r) - elements = append(elements, element) - } - - cp.elements = elements - return cp } -func (d *choiceDefinition) builder() builder { - if d.cbuilder == nil { - d.cbuilder = &choiceBuilder{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: &idSet{}, - } +func (d *choiceDefinition) createElementParsers() { + for _, def := range d.elementDefs { + element := def.parser() + d.cparser.elements = append(d.cparser.elements, element) } - - return d.cbuilder } -func (p *choiceParser) nodeName() string { return p.name } -func (p *choiceParser) nodeID() int { return p.id } +func (d *choiceDefinition) parser() parser { + if d.cparser != nil { + return d.cparser + } + + d.createParser() + d.createElementParsers() + return d.cparser +} + +func (d *choiceDefinition) builder() builder { return d.cbuilder } +func (p *choiceParser) nodeName() string { return p.name } +func (p *choiceParser) nodeID() int { return p.id } func (p *choiceParser) parse(c *context) { if c.fromStore(p.id) { diff --git a/notes.txt b/notes.txt index da0e080..ab3f664 100644 --- a/notes.txt +++ b/notes.txt @@ -22,6 +22,8 @@ code generation go: - char matches can be generated into switches code generation js documentation flag +support custom tokenization +streaming [problems] can the root be an alias? check the commit mechanism diff --git a/parse.go b/parse.go deleted file mode 100644 index 01a41d2..0000000 --- a/parse.go +++ /dev/null @@ -1,74 +0,0 @@ -package treerack - -import "fmt" - -type definition interface { - nodeName() string - setNodeName(string) - nodeID() int - commitType() CommitType - setCommitType(CommitType) - setID(int) - validate(*registry) error - init(*registry) - setIncludedBy(*registry, int) - parser(*registry) parser - builder() builder -} - -type parser interface { - nodeName() string - nodeID() int - parse(*context) -} - -type builder interface { - nodeName() string - nodeID() int - build(*context) ([]*Node, bool) -} - -func parserNotFound(name string) error { - return fmt.Errorf("parser not found: %s", name) -} - -func cannotIncludeParsers(name string) error { - return fmt.Errorf("parser: %s cannot include other parsers", name) -} - -func intsContain(is []int, i int) bool { - for _, ii := range is { - if ii == i { - return true - } - } - - return false -} - -func parse(p parser, c *context) error { - p.parse(c) - if c.readErr != nil { - return c.readErr - } - - if !c.match { - return ErrInvalidInput - } - - if err := c.finalize(p); err != nil { - return err - } - - return nil -} - -func build(b builder, c *context) *Node { - c.offset = 0 - n, ok := b.build(c) - if !ok || len(n) != 1 { - panic("damaged parse result") - } - - return n[0] -} diff --git a/registry.go b/registry.go index c6f0fbd..eee1910 100644 --- a/registry.go +++ b/registry.go @@ -28,11 +28,6 @@ func (r *registry) definition(name string) (definition, bool) { return d, ok } -func (r *registry) parser(name string) (parser, bool) { - p, ok := r.parsers[name] - return p, ok -} - func (r *registry) setDefinition(d definition) error { if _, ok := r.definitions[d.nodeName()]; ok { return duplicateDefinition(d.nodeName()) @@ -48,10 +43,6 @@ func (r *registry) setDefinition(d definition) error { return nil } -func (r *registry) setParser(p parser) { - r.parsers[p.nodeName()] = p -} - func (r *registry) getDefinitions() []definition { var defs []definition for _, def := range r.definitions { diff --git a/sequence.go b/sequence.go index 30246ba..a1d49f3 100644 --- a/sequence.go +++ b/sequence.go @@ -5,9 +5,11 @@ type sequenceDefinition struct { id int commit CommitType items []SequenceItem + itemDefs []definition includedBy []int ranges [][]int sbuilder *sequenceBuilder + sparser *sequenceParser allChars bool validated bool initialized bool @@ -54,7 +56,6 @@ func (d *sequenceDefinition) validate(r *registry) error { } d.validated = true - for i := range d.items { ii, ok := r.definition(d.items[i].Name) if !ok { @@ -70,34 +71,23 @@ func (d *sequenceDefinition) validate(r *registry) error { } func (d *sequenceDefinition) includeItems() bool { - return len(d.items) == 1 && d.items[0].Min == 1 && d.items[0].Max == 1 + return len(d.items) == 1 && d.items[0].Max == 1 } -func (d *sequenceDefinition) init(r *registry) { - if d.initialized { +func (d *sequenceDefinition) ensureBuilder() { + if d.sbuilder != nil { return } - d.initialized = true - - for i := range d.items { - if d.items[i].Min == 0 && d.items[i].Max == 0 { - d.items[i].Min, d.items[i].Max = 1, 1 - } else if d.items[i].Max == 0 { - d.items[i].Max = -1 - } + d.sbuilder = &sequenceBuilder{ + name: d.name, + id: d.id, + commit: d.commit, + includedBy: &idSet{}, } +} - if d.sbuilder == nil { - d.sbuilder = &sequenceBuilder{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: &idSet{}, - } - } - - allChars := true +func (d *sequenceDefinition) initRanges() { for _, item := range d.items { if item.Min == 0 && item.Max == 0 { item.Min, item.Max = 1, 1 @@ -106,103 +96,86 @@ func (d *sequenceDefinition) init(r *registry) { } d.ranges = append(d.ranges, []int{item.Min, item.Max}) + } +} +func (d *sequenceDefinition) initItems(r *registry) { + allChars := true + for _, item := range d.items { def := r.definitions[item.Name] + d.itemDefs = append(d.itemDefs, def) + def.init(r) d.sbuilder.items = append(d.sbuilder.items, def.builder()) - if allChars { if _, isChar := def.(*charParser); !isChar { allChars = false } } - - def.init(r) } - d.sbuilder.ranges = d.ranges d.sbuilder.allChars = allChars d.allChars = allChars +} - if !d.includeItems() { +func (d *sequenceDefinition) init(r *registry) { + if d.initialized { return } - r.definitions[d.items[0].Name].setIncludedBy(r, d.id) + d.initialized = true + d.initRanges() + d.ensureBuilder() + d.sbuilder.ranges = d.ranges + d.initItems(r) + if d.includeItems() { + d.itemDefs[0].setIncludedBy(d.id) + } } -func (d *sequenceDefinition) setIncludedBy(r *registry, includedBy int) { +func (d *sequenceDefinition) setIncludedBy(includedBy int) { if intsContain(d.includedBy, includedBy) { return } d.includedBy = append(d.includedBy, includedBy) - - if d.sbuilder == nil { - d.sbuilder = &sequenceBuilder{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: &idSet{}, - } - } - + d.ensureBuilder() d.sbuilder.includedBy.set(includedBy) - - if !d.includeItems() { - return + if d.includeItems() { + d.itemDefs[0].setIncludedBy(includedBy) } - - r.definitions[d.items[0].Name].setIncludedBy(r, includedBy) } -func (d *sequenceDefinition) parser(r *registry) parser { - p, ok := r.parser(d.name) - if ok { - return p - } - - sp := &sequenceParser{ +func (d *sequenceDefinition) createParser() { + d.sparser = &sequenceParser{ name: d.name, id: d.id, commit: d.commit, includedBy: d.includedBy, allChars: d.allChars, + ranges: d.ranges, } - - r.setParser(sp) - - var items []parser - for _, item := range d.items { - pi, ok := r.parser(item.Name) - if ok { - items = append(items, pi) - continue - } - - pi = r.definitions[item.Name].parser(r) - items = append(items, pi) - } - - sp.items = items - sp.ranges = d.ranges - return sp } -func (d *sequenceDefinition) builder() builder { - if d.sbuilder == nil { - d.sbuilder = &sequenceBuilder{ - name: d.name, - id: d.id, - commit: d.commit, - includedBy: &idSet{}, - } +func (d *sequenceDefinition) createItemParsers() { + for _, item := range d.itemDefs { + pi := item.parser() + d.sparser.items = append(d.sparser.items, pi) } - - return d.sbuilder } -func (p *sequenceParser) nodeName() string { return p.name } -func (p *sequenceParser) nodeID() int { return p.id } +func (d *sequenceDefinition) parser() parser { + if d.sparser != nil { + return d.sparser + } + + d.createParser() + d.createItemParsers() + return d.sparser +} + +func (d *sequenceDefinition) builder() builder { return d.sbuilder } +func (p *sequenceParser) nodeName() string { return p.name } +func (p *sequenceParser) nodeID() int { return p.id } func (p *sequenceParser) parse(c *context) { if !p.allChars { diff --git a/syntax.go b/syntax.go index 10dbf89..f7e92e4 100644 --- a/syntax.go +++ b/syntax.go @@ -33,6 +33,32 @@ type Syntax struct { builder builder } +type definition interface { + nodeName() string + setNodeName(string) + nodeID() int + commitType() CommitType + setCommitType(CommitType) + setID(int) + validate(*registry) error + init(*registry) + setIncludedBy(int) + parser() parser + builder() builder +} + +type parser interface { + nodeName() string + nodeID() int + parse(*context) +} + +type builder interface { + nodeName() string + nodeID() int + build(*context) ([]*Node, bool) +} + var ( ErrSyntaxInitialized = errors.New("syntax initialized") ErrInitFailed = errors.New("init failed") @@ -49,6 +75,14 @@ var ( ErrInvalidSymbolName = errors.New("invalid symbol name") ) +func duplicateDefinition(name string) error { + return fmt.Errorf("duplicate definition: %s", name) +} + +func parserNotFound(name string) error { + return fmt.Errorf("parser not found: %s", name) +} + const symbolChars = "^\\\\ \\n\\t\\b\\f\\r\\v/.\\[\\]\\\"{}\\^+*?|():=;" func parseSymbolChars(c []rune) []rune { @@ -62,10 +96,6 @@ func parseSymbolChars(c []rune) []rune { var symbolCharRunes = parseSymbolChars([]rune(symbolChars)) -func duplicateDefinition(name string) error { - return fmt.Errorf("duplicate definition: %s", name) -} - func isValidSymbol(n string) bool { runes := []rune(n) for _, r := range runes { @@ -78,6 +108,16 @@ func isValidSymbol(n string) bool { } +func intsContain(is []int, i int) bool { + for _, ii := range is { + if ii == i { + return true + } + } + + return false +} + func (s *Syntax) applyRoot(d definition) error { explicitRoot := d.commitType()&Root != 0 if explicitRoot && s.explicitRoot { @@ -247,7 +287,7 @@ func (s *Syntax) Init() error { } s.root.init(s.registry) - s.parser = s.root.parser(s.registry) + s.parser = s.root.parser() s.builder = s.root.builder() s.initialized = true @@ -262,17 +302,30 @@ func (s *Syntax) Generate(w io.Writer) error { return ErrNotImplemented } -// TODO: optimize top sequences to save memory, or just support streaming, or combine the two - func (s *Syntax) Parse(r io.Reader) (*Node, error) { if err := s.Init(); err != nil { return nil, err } c := newContext(bufio.NewReader(r)) - if err := parse(s.parser, c); err != nil { + s.parser.parse(c) + if c.readErr != nil { + return nil, c.readErr + } + + if !c.match { + return nil, ErrInvalidInput + } + + if err := c.finalize(s.parser); err != nil { return nil, err } - return build(s.builder, c), nil + c.offset = 0 + n, ok := s.builder.build(c) + if !ok || len(n) != 1 { + panic("damaged parse result") + } + + return n[0], nil }