From 2db706b1d18aabab906a00a58f0ab5712cab7041 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Mon, 17 Jul 2017 04:23:29 +0200 Subject: [PATCH 01/15] remove node commit --- boot_test.go | 96 ++++++++++++++++++++++++++++----------------------- char.go | 35 ++++--------------- choice.go | 52 +++++++++++----------------- context.go | 17 ++++----- json.parser | 14 ++++---- notes.txt | 5 ++- parse.go | 19 +++++----- sequence.go | 72 ++++++++++++-------------------------- store.go | 34 +++++++++--------- syntax.parser | 2 +- test.mml | 8 ++--- 11 files changed, 150 insertions(+), 204 deletions(-) diff --git a/boot_test.go b/boot_test.go index f92f347..3d24635 100644 --- a/boot_test.go +++ b/boot_test.go @@ -3,6 +3,7 @@ package treerack import ( "os" "testing" + "time" ) func TestBoot(t *testing.T) { @@ -12,7 +13,7 @@ func TestBoot(t *testing.T) { return } - f, err := os.Open("syntax.parser") + f, err := os.Open("mml.parser") if err != nil { t.Error(err) return @@ -20,58 +21,65 @@ func TestBoot(t *testing.T) { defer f.Close() - n0, err := b.Parse(f) - if err != nil { + start := time.Now() + _, err = b.Parse(f) + t.Log("duration:", time.Now().Sub(start)) + + if err != ErrNotImplemented { t.Error(err) - return } - s0 := NewSyntax() - if err := define(s0, n0); err != nil { - t.Error(err) - return - } + // if err != nil { + // t.Error(err) + // return + // } - _, err = f.Seek(0, 0) - if err != nil { - t.Error(err) - return - } + // s0 := NewSyntax() + // if err := define(s0, n0); err != nil { + // t.Error(err) + // return + // } - err = s0.Init() - if err != nil { - t.Error(err) - return - } + // _, err = f.Seek(0, 0) + // if err != nil { + // t.Error(err) + // return + // } - n1, err := s0.Parse(f) - if err != nil { - t.Error(err) - return - } + // err = s0.Init() + // if err != nil { + // t.Error(err) + // return + // } - checkNode(t, n1, n0) - if t.Failed() { - return - } + // n1, err := s0.Parse(f) + // if err != nil { + // t.Error(err) + // return + // } - s1 := NewSyntax() - if err := define(s1, n1); err != nil { - t.Error(err) - return - } + // checkNode(t, n1, n0) + // if t.Failed() { + // return + // } - _, err = f.Seek(0, 0) - if err != nil { - t.Error(err) - return - } + // s1 := NewSyntax() + // if err := define(s1, n1); err != nil { + // t.Error(err) + // return + // } - n2, err := s1.Parse(f) - if err != nil { - t.Error(err) - return - } + // _, err = f.Seek(0, 0) + // if err != nil { + // t.Error(err) + // return + // } - checkNode(t, n2, n1) + // n2, err := s1.Parse(f) + // if err != nil { + // t.Error(err) + // return + // } + + // checkNode(t, n2, n1) } diff --git a/char.go b/char.go index db4a0e7..2a4099a 100644 --- a/char.go +++ b/char.go @@ -55,7 +55,7 @@ func (p *charParser) setIncludedBy(includedBy parser, parsers *idSet) { p.includedBy = append(p.includedBy, includedBy) } -func (p *charParser) storeIncluded(*context, *Node) { +func (p *charParser) storeIncluded(*context, int, int) { panic(cannotIncludeParsers(p.name)) } @@ -76,34 +76,13 @@ func (p *charParser) match(t rune) bool { } func (p *charParser) parse(t Trace, c *context) { - // t = t.Extend(p.name) - // t.Out1("parsing char", c.offset) - - // if p.commit&Documentation != 0 { - // // t.Out1("fail, doc") - // c.fail(c.offset) - // return - // } - - // if _, ok := c.fromStore(p.id); ok { - // // t.Out1("found in store, match:", m) - // return - // } - - if tok, ok := c.token(); ok && p.match(tok) { - // t.Out1("success", string(tok)) - // n := newNode(p.name, p.id, c.offset, c.offset+1, p.commit) - // c.store.set(c.offset, p.id, n) - // for _, includedBy := range p.includedBy { - // includedBy.storeIncluded(c, n) - // } - - c.successChar() - return - } else { - // t.Out1("fail", string(tok)) - // c.store.set(c.offset, p.id, nil) + if tok, ok := c.token(); !ok || !p.match(tok) { c.fail(c.offset) return } + + c.success(c.offset + 1) + for _, includedBy := range p.includedBy { + includedBy.storeIncluded(c, c.offset, c.offset + 1) + } } diff --git a/choice.go b/choice.go index 522e9e3..906af4d 100644 --- a/choice.go +++ b/choice.go @@ -85,68 +85,60 @@ func (p *choiceParser) setIncludedBy(includedBy parser, parsers *idSet) { p.includedBy = append(p.includedBy, includedBy) } -func (p *choiceParser) storeIncluded(c *context, n *Node) { - if !c.excluded(n.From, p.id) { +func (p *choiceParser) storeIncluded(c *context, from, to int) { + if !c.excluded(from, p.id) { return } - nc := newNode(p.name, p.id, n.From, n.To, p.commit) - nc.append(n) - c.store.set(nc.From, p.id, nc) + c.store.set(from, p.id, true, to) for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, nc) + includedBy.storeIncluded(c, from, to) } } func (p *choiceParser) parse(t Trace, c *context) { - // t = t.Extend(p.name) - // t.Out1("parsing choice", c.offset) - if p.commit&Documentation != 0 { - // t.Out1("fail, doc") c.fail(c.offset) return } if _, ok := c.fromStore(p.id); ok { - // t.Out1("found in store, match:", m) return } if c.excluded(c.offset, p.id) { - // t.Out1("excluded") c.fail(c.offset) return } c.exclude(c.offset, p.id) - initialOffset := c.offset + from := c.offset + to := c.offset - node := newNode(p.name, p.id, c.offset, c.offset, p.commit) var match bool for { - elements := p.elements + elementIndex := 0 var foundMatch bool - for len(elements) > 0 { - elements[0].parse(t, c) - elements = elements[1:] - c.offset = node.From + for elementIndex < len(p.elements) { + p.elements[elementIndex].parse(t, c) + elementIndex++ + nextTo := c.offset + c.offset = from - if !c.match || match && c.node.tokenLength() <= node.tokenLength() { + if !c.match || match && nextTo <= to { continue } match = true foundMatch = true - node = newNode(p.name, p.id, c.offset, c.offset, p.commit) - node.append(c.node) + to = nextTo - c.store.set(node.From, p.id, node) + c.store.set(from, p.id, true, to) for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, node) + includedBy.storeIncluded(c, from, to) } } @@ -156,14 +148,12 @@ func (p *choiceParser) parse(t Trace, c *context) { } if match { - // t.Out1("choice, success") - c.success(node) - c.include(initialOffset, p.id) + c.success(to) + c.include(from, p.id) return } - // t.Out1("fail") - c.store.set(node.From, p.id, nil) - c.fail(node.From) - c.include(initialOffset, p.id) + c.store.set(from, p.id, false, 0) + c.fail(from) + c.include(from, p.id) } diff --git a/context.go b/context.go index fb00be3..9c8f30b 100644 --- a/context.go +++ b/context.go @@ -105,13 +105,13 @@ func (c *context) include(offset int, id int) { } func (c *context) fromStore(id int) (bool, bool) { - n, m, ok := c.store.get(c.offset, id) + to, m, ok := c.store.get(c.offset, id) if !ok { return false, false } if m { - c.success(n) + c.success(to) } else { c.fail(c.offset) } @@ -119,15 +119,8 @@ func (c *context) fromStore(id int) (bool, bool) { return m, true } -func (c *context) success(n *Node) { - c.node = n - c.offset = n.To - c.match = true -} - -func (c *context) successChar() { - c.node = nil - c.offset++ +func (c *context) success(to int) { + c.offset = to c.match = true } @@ -137,6 +130,8 @@ func (c *context) fail(offset int) { } func (c *context) finalize() error { + return ErrNotImplemented + if c.node.To < c.readOffset { return ErrUnexpectedCharacter } diff --git a/json.parser b/json.parser index ade4df9..c7d40e8 100644 --- a/json.parser +++ b/json.parser @@ -1,12 +1,12 @@ // JSON (http://www.json.org) -ws:alias = [ \b\f\n\r\t]; +ws:ws = [ \b\f\n\r\t]; true = "true"; false = "false"; null = "null"; -string = "\"" ([^\\"\b\f\n\r\t] | "\\" (["\\/bfnrt] | "u" [0-9a-f]{4}))* "\""; -number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; -entry = string ws* ":" ws* value; -object = "{" ws* (entry (ws* "," ws* entry)*)? ws* "}"; -array = "[" ws* (value (ws* "," ws* value)*)? ws* "]"; +string:nows = "\"" ([^\\"\b\f\n\r\t] | "\\" (["\\/bfnrt] | "u" [0-9a-f]{4}))* "\""; +number:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; +entry = string ":" value; +object = "{" (entry ("," entry)*)? "}"; +array = "[" (value ("," value)*)? "]"; value:alias = true | false | null | string | number | object | array; -json = value; +json:root = value; diff --git a/notes.txt b/notes.txt index 4f80dcf..265eff0 100644 --- a/notes.txt +++ b/notes.txt @@ -15,8 +15,11 @@ coverage custom tokens indentation streaming -code generation go +code generation go: +- find things that depend on the syntax input +- char matches can be generated into switches code generation js +ws and nows flags [problems] can the root be an alias? check the commit mechanism diff --git a/parse.go b/parse.go index ccdee12..e4bbc5c 100644 --- a/parse.go +++ b/parse.go @@ -8,16 +8,23 @@ type definition interface { setID(int) parser(*registry, *idSet) (parser, error) commitType() CommitType + // builder() builder } type parser interface { nodeName() string nodeID() int setIncludedBy(parser, *idSet) - storeIncluded(*context, *Node) + storeIncluded(*context, int, int) parse(Trace, *context) } +type builder interface { + nodeName() string + nodeID() int + build(*context) *Node +} + func parserNotFound(name string) error { return fmt.Errorf("parser not found: %s", name) } @@ -26,16 +33,6 @@ func cannotIncludeParsers(name string) error { return fmt.Errorf("parser: %s cannot include other parsers", name) } -func stringsContainDeprecated(ss []string, s string) bool { - for _, si := range ss { - if si == s { - return true - } - } - - return false -} - func parse(t Trace, p parser, c *context) (*Node, error) { p.parse(t, c) if c.readErr != nil { diff --git a/sequence.go b/sequence.go index de54591..ab54ec3 100644 --- a/sequence.go +++ b/sequence.go @@ -106,93 +106,67 @@ func (p *sequenceParser) setIncludedBy(includedBy parser, parsers *idSet) { p.includedBy = append(p.includedBy, includedBy) } -func (p *sequenceParser) storeIncluded(c *context, n *Node) { - if !c.excluded(n.From, p.id) { +func (p *sequenceParser) storeIncluded(c *context, from, to int) { + if !c.excluded(from, p.id) { return } - nc := newNode(p.name, p.id, n.From, n.To, p.commit) - nc.append(n) - c.store.set(nc.From, p.id, nc) + c.store.set(from, p.id, true, to) for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, nc) + includedBy.storeIncluded(c, from, to) } } func (p *sequenceParser) parse(t Trace, c *context) { - // t = t.Extend(p.name) - // t.Out1("parsing sequence", c.offset) - if p.commit&Documentation != 0 { - // t.Out1("fail, doc") c.fail(c.offset) return } if c.excluded(c.offset, p.id) { - // t.Out1("excluded") c.fail(c.offset) return } c.exclude(c.offset, p.id) - initialOffset := c.offset - items := p.items - ranges := p.ranges + itemIndex := 0 var currentCount int - node := newNode(p.name, p.id, c.offset, c.offset, p.commit) + from := c.offset + to := c.offset - for len(items) > 0 { - var m bool - // var ok bool - // m, ok = c.fromStore(items[0].nodeID()) - // if ok { - // // t.Out1("sequence item found in store, match:", m, items[0].nodeName(), c.offset) - // } else { - items[0].parse(t, c) - m = c.match - // } - - if !m { - if currentCount < ranges[0][0] { - // t.Out1("fail, item failed") - // c.store.set(node.From, p.id, nil) - c.fail(node.From) - c.include(initialOffset, p.id) + for itemIndex < len(p.items) { + p.items[itemIndex].parse(t, c) + if !c.match { + if currentCount < p.ranges[itemIndex][0] { + c.fail(from) + c.include(from, p.id) return } - items = items[1:] - ranges = ranges[1:] + itemIndex++ currentCount = 0 continue } - // nil as char - if c.node == nil { - node.appendChar(c.offset) - currentCount++ - } else if c.node.tokenLength() > 0 { - node.append(c.node) + parsed := c.offset > to + if parsed { currentCount++ } - if c.node != nil && c.node.tokenLength() == 0 || ranges[0][1] >= 0 && currentCount == ranges[0][1] { - items = items[1:] - ranges = ranges[1:] + to = c.offset + + if !parsed || p.ranges[itemIndex][1] >= 0 && currentCount == p.ranges[itemIndex][1] { + itemIndex++ currentCount = 0 } } - // t.Out1("success, items parsed") - - // c.store.set(node.From, p.id, node) for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, node) + includedBy.storeIncluded(c, from, to) } - c.success(node) - c.include(initialOffset, p.id) + c.success(to) + c.include(from, p.id) } diff --git a/store.go b/store.go index 4e988d2..2df55cd 100644 --- a/store.go +++ b/store.go @@ -3,41 +3,41 @@ package treerack type storeEntry struct { match *idSet noMatch *idSet - nodes []*Node + nodes []int } type store struct { entries []*storeEntry } -func (c *store) get(offset int, id int) (*Node, bool, bool) { +func (c *store) get(offset int, id int) (int, bool, bool) { if len(c.entries) <= offset { - return nil, false, false + return 0, false, false } tc := c.entries[offset] if tc == nil { - return nil, false, false + return 0, false, false } if tc.noMatch.has(id) { - return nil, false, true + return 0, false, true } if !tc.match.has(id) { - return nil, false, false + return 0, false, false } - for _, n := range tc.nodes { - if n.id == id { - return n, true, true + for i := 0; i < len(tc.nodes); i += 2 { + if tc.nodes[i] == id { + return tc.nodes[i + 1], true, true } } - return nil, false, false + return 0, false, false } -func (c *store) set(offset int, id int, n *Node) { +func (c *store) set(offset int, id int, match bool, to int) { if len(c.entries) <= offset { if cap(c.entries) > offset { c.entries = c.entries[:offset+1] @@ -59,7 +59,7 @@ func (c *store) set(offset int, id int, n *Node) { c.entries[offset] = tc } - if n == nil { + if !match { if tc.match.has(id) { return } @@ -69,17 +69,17 @@ func (c *store) set(offset int, id int, n *Node) { } tc.match.set(id) - for i, ni := range tc.nodes { - if ni.id == id { - if n.tokenLength() > ni.tokenLength() { - tc.nodes[i] = n + for i := 0; i < len(tc.nodes); i += 2 { + if tc.nodes[i] == id { + if to > tc.nodes[i + 1] { + tc.nodes[i + 1] = to } return } } - tc.nodes = append(tc.nodes, n) + tc.nodes = append(tc.nodes, id, to) } /* diff --git a/syntax.parser b/syntax.parser index 5bb8d8b..7c3fd86 100644 --- a/syntax.parser +++ b/syntax.parser @@ -63,4 +63,4 @@ flag:alias = alias | doc | root; definition = symbol (":" flag)* wsc* "=" wsc* expression; definitions:alias = definition (wsc* ";" (wsc | ";")* definition)*; -syntax:root = (wsc | ";")* definitions? (wsc | ";")*; +syntax:root = (wsc | ";")* definitions? (wsc | ";")*; diff --git a/test.mml b/test.mml index b769713..713e221 100644 --- a/test.mml +++ b/test.mml @@ -48,7 +48,7 @@ fn prevNode(compare, node, value) { fn findNode(compare, node, value) { switch { - case node == empty: false + case node == empty: false case node.value == value: true case compare(node.value, value) < 0: findNode(compare, node.greater, value) @@ -69,8 +69,8 @@ fn delNode(compare, node, value) { fn delCurrent() { match node { case {less, greater}: delBetween() - case {less}: node.less - case {greater}: node.greater + case {less}: node.less + case {greater}: node.greater } } @@ -134,7 +134,7 @@ fn insertNode(compare, node, value) { case empty: {value: value} case c > 0: insertGreater() -> balance() case c < 0: insertLess() -> balance() - default: node + default: node } } From ea01f81ef0f0472347e46807c39a0b1e9ed13848 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Mon, 17 Jul 2017 21:58:03 +0200 Subject: [PATCH 02/15] store sequences --- choice.go | 6 +- context.go | 2 +- sequence.go | 8 +- store.go | 288 +++++++++++++--------------------------------------- syntax.go | 2 +- 5 files changed, 81 insertions(+), 225 deletions(-) diff --git a/choice.go b/choice.go index 906af4d..c0f3d2a 100644 --- a/choice.go +++ b/choice.go @@ -90,7 +90,7 @@ func (p *choiceParser) storeIncluded(c *context, from, to int) { return } - c.store.set(from, p.id, true, to) + c.store.setMatch(from, p.id, to) for _, includedBy := range p.includedBy { includedBy.storeIncluded(c, from, to) @@ -136,7 +136,7 @@ func (p *choiceParser) parse(t Trace, c *context) { foundMatch = true to = nextTo - c.store.set(from, p.id, true, to) + c.store.setMatch(from, p.id, to) for _, includedBy := range p.includedBy { includedBy.storeIncluded(c, from, to) } @@ -153,7 +153,7 @@ func (p *choiceParser) parse(t Trace, c *context) { return } - c.store.set(from, p.id, false, 0) + c.store.setNoMatch(from, p.id) c.fail(from) c.include(from, p.id) } diff --git a/context.go b/context.go index 9c8f30b..137f9c8 100644 --- a/context.go +++ b/context.go @@ -105,7 +105,7 @@ func (c *context) include(offset int, id int) { } func (c *context) fromStore(id int) (bool, bool) { - to, m, ok := c.store.get(c.offset, id) + to, m, ok := c.store.getMatch(c.offset, id) if !ok { return false, false } diff --git a/sequence.go b/sequence.go index ab54ec3..9f179c2 100644 --- a/sequence.go +++ b/sequence.go @@ -111,7 +111,7 @@ func (p *sequenceParser) storeIncluded(c *context, from, to int) { return } - c.store.set(from, p.id, true, to) + c.store.setMatch(from, p.id, to) for _, includedBy := range p.includedBy { includedBy.storeIncluded(c, from, to) @@ -129,6 +129,10 @@ func (p *sequenceParser) parse(t Trace, c *context) { return } + if c.store.hasNoMatch(c.offset, p.id) { + c.fail(c.offset) + } + c.exclude(c.offset, p.id) itemIndex := 0 @@ -140,6 +144,7 @@ func (p *sequenceParser) parse(t Trace, c *context) { p.items[itemIndex].parse(t, c) if !c.match { if currentCount < p.ranges[itemIndex][0] { + c.store.setNoMatch(from, p.id) c.fail(from) c.include(from, p.id) return @@ -167,6 +172,7 @@ func (p *sequenceParser) parse(t Trace, c *context) { includedBy.storeIncluded(c, from, to) } + c.store.setMatch(from, p.id, to) c.success(to) c.include(from, p.id) } diff --git a/store.go b/store.go index 2df55cd..557e377 100644 --- a/store.go +++ b/store.go @@ -3,268 +3,118 @@ package treerack type storeEntry struct { match *idSet noMatch *idSet - nodes []int + matches []int + all []int } type store struct { entries []*storeEntry } -func (c *store) get(offset int, id int) (int, bool, bool) { - if len(c.entries) <= offset { +func (s *store) getEntry(offset int) *storeEntry { + if len(s.entries) <= offset { + return nil + } + + return s.entries[offset] +} + +func (s *store) hasNoMatch(offset, id int) bool { + e := s.getEntry(offset) + if e == nil { + return false + } + + return e.noMatch.has(id) +} + +func (s *store) getMatch(offset, id int) (int, bool, bool) { + e := s.getEntry(offset) + if e == nil { return 0, false, false } - tc := c.entries[offset] - if tc == nil { - return 0, false, false - } - - if tc.noMatch.has(id) { + if e.noMatch.has(id) { return 0, false, true } - if !tc.match.has(id) { + if !e.match.has(id) { return 0, false, false } - for i := 0; i < len(tc.nodes); i += 2 { - if tc.nodes[i] == id { - return tc.nodes[i + 1], true, true + for i := 0; i < len(e.matches); i += 2 { + if e.matches[i] == id { + return e.matches[i + 1], true, true } } return 0, false, false } -func (c *store) set(offset int, id int, match bool, to int) { - if len(c.entries) <= offset { - if cap(c.entries) > offset { - c.entries = c.entries[:offset+1] - } else { - c.entries = c.entries[:cap(c.entries)] - for len(c.entries) <= offset { - c.entries = append(c.entries, nil) - } - } - } - - tc := c.entries[offset] - if tc == nil { - tc = &storeEntry{ - match: &idSet{}, - noMatch: &idSet{}, - } - - c.entries[offset] = tc - } - - if !match { - if tc.match.has(id) { - return - } - - tc.noMatch.set(id) +func (s *store) ensureOffset(offset int) { + if len(s.entries) > offset { return } - tc.match.set(id) - for i := 0; i < len(tc.nodes); i += 2 { - if tc.nodes[i] == id { - if to > tc.nodes[i + 1] { - tc.nodes[i + 1] = to - } - - return - } + if cap(s.entries) > offset { + s.entries = s.entries[:offset+1] + return } - tc.nodes = append(tc.nodes, id, to) -} - -/* - -[][][]int - -id, length, where to start in the underlying layer, which list in the layer - -attibutes: -- sequence: length, the items in the layer below -- choice: the item below - -features: -- there can be sequences or choices under choices - -in every position: -- store the valid choices with the underlying parsed nodes - -3D table: layer, choice, sequence -stored choice identified by: offset, layer, choice index - -*/ - -func (c *store) inc() { -} - -func (c *store) dec() { -} - -func (c *store) get2(offset, id int) (int, bool) { - return 0, false -} - -func (c *store) cache(offset, id int, match bool, length int) { -} - -func (c *store) set2(offset, id int, match bool, length int) { - /* - c.cache(offset, id, match, length) - levels := c.offsetLevels[offset] - levels[c.currentLevel] = id - */ -} - -/* -package treerack - -type storeEntry struct { - match *idSet - noMatch *idSet - nodes []*Node - offset int -} - -type store struct { - current *storeEntry - currentIndex int - entries []*storeEntry -} - -func (s *store) find(offset int) *storeEntry { - if s.current == nil { - return nil - } - - var seekPrev, seekNext bool - for { - switch { - case s.current.offset == offset: - return s.current - case s.current.offset < offset: - if seekPrev { - return nil - } - - seekNext = true - s.currentIndex++ - if s.currentIndex == len(s.entries) { - s.currentIndex = len(s.entries) - 1 - return nil - } - - s.current = s.entries[s.currentIndex] - case s.current.offset > offset: - if seekNext { - return nil - } - - seekPrev = true - s.currentIndex-- - if s.currentIndex == -1 { - s.currentIndex = 0 - return nil - } - - s.current = s.entries[s.currentIndex] - } + s.entries = s.entries[:cap(s.entries)] + for len(s.entries) <= offset { + s.entries = append(s.entries, nil) } } -func (s *store) findCreate(offset int) *storeEntry { - entry := s.find(offset) - if entry != nil { - return entry +func (s *store) ensureEntry(offset int) *storeEntry { + s.ensureOffset(offset) + e := s.entries[offset] + if e != nil { + return e } - entry = &storeEntry{ + e = &storeEntry{ match: &idSet{}, noMatch: &idSet{}, - offset: offset, } - switch { - case s.current != nil && s.current.offset > offset: - s.entries = append( - s.entries[:s.currentIndex], - append([]*storeEntry{entry}, s.entries[s.currentIndex:]...)..., - ) - s.current = entry - case s.current != nil && s.current.offset < offset: - s.entries = append( - s.entries[:s.currentIndex + 1], - append([]*storeEntry{entry}, s.entries[s.currentIndex + 1:]...)..., - ) - s.current = entry - s.currentIndex++ - default: - s.entries = []*storeEntry{entry} - s.current = entry - s.currentIndex = 0 - } - - return entry + s.entries[offset] = e + return e } -func (s *store) get(offset int, id int) (*Node, bool, bool) { - entry := s.find(offset) - if entry == nil { - return nil, false, false - } +func (s *store) setMatch(offset, id, to int) { + e := s.ensureEntry(offset) - if entry == nil { - return nil, false, false - } + e.match.set(id) + for i := 0; i < len(e.matches); i += 2 { + if e.matches[i] == id { + if to > e.matches[i + 1] { + e.matches[i + 1] = to + } - if entry.noMatch.has(id) { - return nil, false, true - } - - if !entry.match.has(id) { - return nil, false, false - } - - for _, n := range entry.nodes { - if n.id == id { - return n, true, true - } - } - - return nil, false, false -} - -func (s *store) set(offset int, id int, n *Node) { - entry := s.findCreate(offset) - - if n == nil { - if entry.match.has(id) { - return - } - - entry.noMatch.set(id) - return - } - - entry.match.set(id) - for i, ni := range entry.nodes { - if ni.id == id { - if n.tokenLength() > ni.tokenLength() { - entry.nodes[i] = n + if to != e.matches[i + 1] { + e.all = append(e.all, id, to) } return } } - entry.nodes = append(entry.nodes, n) + e.matches = append(e.matches, id, to) +} + +func (s *store) setNoMatch(offset, id int) { + e := s.ensureEntry(offset) + + if e.match.has(id) { + return + } + + e.noMatch.set(id) +} + +func (s *store) add(offset, id, to int) { + e := s.ensureEntry(offset) + e.all = append(e.all, id, to) } -*/ diff --git a/syntax.go b/syntax.go index a2bdb56..22cef17 100644 --- a/syntax.go +++ b/syntax.go @@ -162,7 +162,7 @@ func (s *Syntax) Generate(w io.Writer) error { return ErrNotImplemented } -// TODO: optimize top sequences to save memory +// TODO: optimize top sequences to save memory, or just support streaming, or combine the two func (s *Syntax) Parse(r io.Reader) (*Node, error) { if err := s.Init(); err != nil { From b9f05bed7d2f7669044b6b2c234a3ea294588ff3 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Mon, 17 Jul 2017 23:59:26 +0200 Subject: [PATCH 03/15] simplify storage --- parse.go | 2 +- store.go | 120 +++++++++++++++++++------------------------------------ 2 files changed, 42 insertions(+), 80 deletions(-) diff --git a/parse.go b/parse.go index e4bbc5c..91bb52d 100644 --- a/parse.go +++ b/parse.go @@ -15,7 +15,7 @@ type parser interface { nodeName() string nodeID() int setIncludedBy(parser, *idSet) - storeIncluded(*context, int, int) + storeIncluded(*context, int, int) // can be just an id set, taking what's excluded from the context parse(Trace, *context) } diff --git a/store.go b/store.go index 557e377..36a7458 100644 --- a/store.go +++ b/store.go @@ -1,120 +1,82 @@ package treerack -type storeEntry struct { - match *idSet - noMatch *idSet - matches []int - all []int -} - type store struct { - entries []*storeEntry -} - -func (s *store) getEntry(offset int) *storeEntry { - if len(s.entries) <= offset { - return nil - } - - return s.entries[offset] + noMatch []*idSet + match [][]int } func (s *store) hasNoMatch(offset, id int) bool { - e := s.getEntry(offset) - if e == nil { + if len(s.noMatch) <= offset || s.noMatch[offset] == nil { return false } - return e.noMatch.has(id) + return s.noMatch[offset].has(id) } func (s *store) getMatch(offset, id int) (int, bool, bool) { - e := s.getEntry(offset) - if e == nil { - return 0, false, false - } - - if e.noMatch.has(id) { + if s.hasNoMatch(offset, id) { return 0, false, true } - if !e.match.has(id) { + if len(s.match) <= offset { return 0, false, false } - for i := 0; i < len(e.matches); i += 2 { - if e.matches[i] == id { - return e.matches[i + 1], true, true + var ( + found bool + length int + ) + + for i := 0; i < len(s.match[offset]); i++ { + if s.match[offset][i] != id { + continue + } + + found = true + if s.match[offset][i + 1] > length { + length = s.match[offset][i + 1] } } - return 0, false, false + return length, found, found } func (s *store) ensureOffset(offset int) { - if len(s.entries) > offset { + if len(s.match) > offset { return } - if cap(s.entries) > offset { - s.entries = s.entries[:offset+1] + if cap(s.match) > offset { + s.match = s.match[:offset+1] return } - s.entries = s.entries[:cap(s.entries)] - for len(s.entries) <= offset { - s.entries = append(s.entries, nil) + s.match = s.match[:cap(s.match)] + for i := len(s.match); i <= offset; i++ { + s.match = append(s.match, nil) } } -func (s *store) ensureEntry(offset int) *storeEntry { - s.ensureOffset(offset) - e := s.entries[offset] - if e != nil { - return e - } - - e = &storeEntry{ - match: &idSet{}, - noMatch: &idSet{}, - } - - s.entries[offset] = e - return e -} - func (s *store) setMatch(offset, id, to int) { - e := s.ensureEntry(offset) - - e.match.set(id) - for i := 0; i < len(e.matches); i += 2 { - if e.matches[i] == id { - if to > e.matches[i + 1] { - e.matches[i + 1] = to - } - - if to != e.matches[i + 1] { - e.all = append(e.all, id, to) - } - - return - } - } - - e.matches = append(e.matches, id, to) + s.ensureOffset(offset) + s.match[offset] = append(s.match[offset], id, to) } func (s *store) setNoMatch(offset, id int) { - e := s.ensureEntry(offset) - - if e.match.has(id) { - return + if len(s.noMatch) <= offset { + if cap(s.noMatch) > offset { + s.noMatch = s.noMatch[:offset + 1] + } else { + s.noMatch = s.noMatch[:cap(s.noMatch)] + for i := len(s.noMatch); i <= offset; i++ { + s.noMatch = append(s.noMatch, nil) + } + } } - e.noMatch.set(id) -} + if s.noMatch[offset] == nil { + s.noMatch[offset] = &idSet{} + } -func (s *store) add(offset, id, to int) { - e := s.ensureEntry(offset) - e.all = append(e.all, id, to) + s.noMatch[offset].set(id) } From 86d244790aeab1c65d5aca4c4523826ed10c619a Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Tue, 18 Jul 2017 00:38:44 +0200 Subject: [PATCH 04/15] microoptimizations in exclude/include, sequence and choice --- char.go | 2 +- choice.go | 6 ++++-- context.go | 50 ++++++++++++++++++++++++-------------------------- sequence.go | 8 ++++---- store.go | 14 +++++++------- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/char.go b/char.go index 2a4099a..3bb0ae6 100644 --- a/char.go +++ b/char.go @@ -83,6 +83,6 @@ func (p *charParser) parse(t Trace, c *context) { c.success(c.offset + 1) for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, c.offset, c.offset + 1) + includedBy.storeIncluded(c, c.offset, c.offset+1) } } diff --git a/choice.go b/choice.go index c0f3d2a..55df20b 100644 --- a/choice.go +++ b/choice.go @@ -117,15 +117,17 @@ func (p *choiceParser) parse(t Trace, c *context) { to := c.offset var match bool + var nextTo int + var elementIndex int for { - elementIndex := 0 var foundMatch bool + elementIndex = 0 for elementIndex < len(p.elements) { p.elements[elementIndex].parse(t, c) elementIndex++ - nextTo := c.offset + nextTo = c.offset c.offset = from if !c.match || match && nextTo <= to { diff --git a/context.go b/context.go index 137f9c8..150e7e7 100644 --- a/context.go +++ b/context.go @@ -15,7 +15,7 @@ type context struct { tokens []rune match bool node *Node - isExcluded []*idSet + isExcluded [][]int } func newContext(r io.RuneReader) *context { @@ -65,43 +65,41 @@ func (c *context) token() (rune, bool) { } func (c *context) excluded(offset int, id int) bool { - if len(c.isExcluded) <= offset || c.isExcluded[offset] == nil { + if len(c.isExcluded) <= id { return false } - return c.isExcluded[offset].has(id) -} - -func (c *context) exclude(offset int, id int) { - if c.excluded(offset, id) { - return - } - - if len(c.isExcluded) <= offset { - c.isExcluded = append(c.isExcluded, nil) - if cap(c.isExcluded) > offset { - c.isExcluded = c.isExcluded[:offset+1] - } else { - c.isExcluded = append( - c.isExcluded[:cap(c.isExcluded)], - make([]*idSet, offset+1-cap(c.isExcluded))..., - ) + for i := range c.isExcluded[id] { + if c.isExcluded[id][i] == offset { + return true } } - if c.isExcluded[offset] == nil { - c.isExcluded[offset] = &idSet{} + return false +} + +func (c *context) exclude(offset int, id int) { + if len(c.isExcluded) <= id { + if cap(c.isExcluded) > id { + c.isExcluded = c.isExcluded[:id+1] + } else { + c.isExcluded = c.isExcluded[:cap(c.isExcluded)] + for i := cap(c.isExcluded); i <= id; i++ { + c.isExcluded = append(c.isExcluded, nil) + } + } } - c.isExcluded[offset].set(id) + c.isExcluded[id] = append(c.isExcluded[id], offset) } func (c *context) include(offset int, id int) { - if len(c.isExcluded) <= offset || c.isExcluded[offset] == nil { - return + for i := range c.isExcluded[id] { + if c.isExcluded[id][i] == offset { + c.isExcluded[id] = append(c.isExcluded[id][:i], c.isExcluded[id][i+1:]...) + break + } } - - c.isExcluded[offset].unset(id) } func (c *context) fromStore(id int) (bool, bool) { diff --git a/sequence.go b/sequence.go index 9f179c2..eecaa1d 100644 --- a/sequence.go +++ b/sequence.go @@ -129,9 +129,9 @@ func (p *sequenceParser) parse(t Trace, c *context) { return } - if c.store.hasNoMatch(c.offset, p.id) { - c.fail(c.offset) - } + // if c.store.hasNoMatch(c.offset, p.id) { + // c.fail(c.offset) + // } c.exclude(c.offset, p.id) @@ -144,7 +144,7 @@ func (p *sequenceParser) parse(t Trace, c *context) { p.items[itemIndex].parse(t, c) if !c.match { if currentCount < p.ranges[itemIndex][0] { - c.store.setNoMatch(from, p.id) + // c.store.setNoMatch(from, p.id) c.fail(from) c.include(from, p.id) return diff --git a/store.go b/store.go index 36a7458..cd0e3e0 100644 --- a/store.go +++ b/store.go @@ -2,7 +2,7 @@ package treerack type store struct { noMatch []*idSet - match [][]int + match [][]int } func (s *store) hasNoMatch(offset, id int) bool { @@ -23,7 +23,7 @@ func (s *store) getMatch(offset, id int) (int, bool, bool) { } var ( - found bool + found bool length int ) @@ -33,8 +33,8 @@ func (s *store) getMatch(offset, id int) (int, bool, bool) { } found = true - if s.match[offset][i + 1] > length { - length = s.match[offset][i + 1] + if s.match[offset][i+1] > length { + length = s.match[offset][i+1] } } @@ -52,7 +52,7 @@ func (s *store) ensureOffset(offset int) { } s.match = s.match[:cap(s.match)] - for i := len(s.match); i <= offset; i++ { + for i := cap(s.match); i <= offset; i++ { s.match = append(s.match, nil) } } @@ -65,10 +65,10 @@ func (s *store) setMatch(offset, id, to int) { func (s *store) setNoMatch(offset, id int) { if len(s.noMatch) <= offset { if cap(s.noMatch) > offset { - s.noMatch = s.noMatch[:offset + 1] + s.noMatch = s.noMatch[:offset+1] } else { s.noMatch = s.noMatch[:cap(s.noMatch)] - for i := len(s.noMatch); i <= offset; i++ { + for i := cap(s.noMatch); i <= offset; i++ { s.noMatch = append(s.noMatch, nil) } } From 6f8ffa81610d3d74516bd3345b3a5801af5a3dfb Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Wed, 26 Jul 2017 18:06:11 +0200 Subject: [PATCH 05/15] fix step --- store.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/store.go b/store.go index cd0e3e0..d3d90ce 100644 --- a/store.go +++ b/store.go @@ -27,7 +27,7 @@ func (s *store) getMatch(offset, id int) (int, bool, bool) { length int ) - for i := 0; i < len(s.match[offset]); i++ { + for i := 0; i < len(s.match[offset]); i += 2 { if s.match[offset][i] != id { continue } From 507299f31821118d3168ff9ee353c26424130af8 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Wed, 26 Jul 2017 20:03:46 +0200 Subject: [PATCH 06/15] measure multiple parse average --- boot_test.go | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/boot_test.go b/boot_test.go index 3d24635..7bde6c6 100644 --- a/boot_test.go +++ b/boot_test.go @@ -21,14 +21,26 @@ func TestBoot(t *testing.T) { defer f.Close() - start := time.Now() - _, err = b.Parse(f) - t.Log("duration:", time.Now().Sub(start)) + var d time.Duration + const n = 120 + for i := 0; i < n; i++ { + if _, err := f.Seek(0, 0); err != nil { + t.Error(err) + return + } - if err != ErrNotImplemented { - t.Error(err) + start := time.Now() + _, err = b.Parse(f) + d += time.Now().Sub(start) + + if err != ErrNotImplemented { + t.Error(err) + return + } } + t.Log("duration:", d/n) + // if err != nil { // t.Error(err) // return From 72c8418e45bb16f3391caa96ec0820fc37572a07 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Thu, 27 Jul 2017 01:48:16 +0200 Subject: [PATCH 07/15] refactore included --- char.go | 19 ++++++------- choice.go | 58 ++++++++++++++++++-------------------- parse.go | 44 +++++++++++++++++++++++++++-- sequence.go | 81 +++++++++++++++++++++++++++++++---------------------- syntax.go | 4 +++ 5 files changed, 129 insertions(+), 77 deletions(-) diff --git a/char.go b/char.go index 3bb0ae6..24dfd9a 100644 --- a/char.go +++ b/char.go @@ -7,7 +7,7 @@ type charParser struct { not bool chars []rune ranges [][]rune - includedBy []parser + includedBy []int } func newChar( @@ -30,6 +30,13 @@ func (p *charParser) nodeName() string { return p.name } func (p *charParser) nodeID() int { return p.id } func (p *charParser) setID(id int) { p.id = id } +func (p *charParser) init(r *registry) error { return nil } + +func (p *charParser) setIncludedBy(r *registry, includedBy int, parsers *idSet) error { + p.includedBy = appendIfMissing(p.includedBy, includedBy) + return nil +} + func (p *charParser) parser(r *registry, parsers *idSet) (parser, error) { if parsers.has(p.id) { panic(cannotIncludeParsers(p.name)) @@ -47,14 +54,6 @@ func (p *charParser) commitType() CommitType { return p.commit } -func (p *charParser) setIncludedBy(includedBy parser, parsers *idSet) { - if parsers.has(p.id) { - panic(cannotIncludeParsers(p.name)) - } - - p.includedBy = append(p.includedBy, includedBy) -} - func (p *charParser) storeIncluded(*context, int, int) { panic(cannotIncludeParsers(p.name)) } @@ -83,6 +82,6 @@ func (p *charParser) parse(t Trace, c *context) { c.success(c.offset + 1) for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, c.offset, c.offset+1) + c.store.setMatch(c.offset, includedBy, c.offset+1) } } diff --git a/choice.go b/choice.go index 55df20b..a0f9ce0 100644 --- a/choice.go +++ b/choice.go @@ -1,10 +1,11 @@ package treerack type choiceDefinition struct { - name string - id int - commit CommitType - elements []string + name string + id int + commit CommitType + elements []string + includedBy []int } type choiceParser struct { @@ -12,7 +13,7 @@ type choiceParser struct { id int commit CommitType elements []parser - includedBy []parser + includedBy []int } func newChoice(name string, ct CommitType, elements []string) *choiceDefinition { @@ -27,6 +28,22 @@ func (d *choiceDefinition) nodeName() string { return d.name } func (d *choiceDefinition) nodeID() int { return d.id } func (d *choiceDefinition) setID(id int) { d.id = id } +func (d *choiceDefinition) init(r *registry) error { + parsers := &idSet{} + parsers.set(d.id) + return setItemsIncludedBy(r, d.elements, d.id, parsers) +} + +func (d *choiceDefinition) setIncludedBy(r *registry, includedBy int, parsers *idSet) error { + if parsers.has(d.id) { + return nil + } + + d.includedBy = appendIfMissing(d.includedBy, includedBy) + parsers.set(d.id) + return setItemsIncludedBy(r, d.elements, includedBy, parsers) +} + func (d *choiceDefinition) parser(r *registry, parsers *idSet) (parser, error) { p, ok := r.parser(d.name) if ok { @@ -34,9 +51,10 @@ func (d *choiceDefinition) parser(r *registry, parsers *idSet) (parser, error) { } cp := &choiceParser{ - name: d.name, - id: d.id, - commit: d.commit, + name: d.name, + id: d.id, + commit: d.commit, + includedBy: d.includedBy, } r.setParser(cp) @@ -48,7 +66,6 @@ func (d *choiceDefinition) parser(r *registry, parsers *idSet) (parser, error) { element, ok := r.parser(e) if ok { elements = append(elements, element) - element.setIncludedBy(cp, parsers) continue } @@ -62,7 +79,6 @@ func (d *choiceDefinition) parser(r *registry, parsers *idSet) (parser, error) { return nil, err } - element.setIncludedBy(cp, parsers) elements = append(elements, element) } @@ -77,26 +93,6 @@ func (d *choiceDefinition) commitType() CommitType { func (p *choiceParser) nodeName() string { return p.name } func (p *choiceParser) nodeID() int { return p.id } -func (p *choiceParser) setIncludedBy(includedBy parser, parsers *idSet) { - if parsers.has(p.id) { - return - } - - p.includedBy = append(p.includedBy, includedBy) -} - -func (p *choiceParser) storeIncluded(c *context, from, to int) { - if !c.excluded(from, p.id) { - return - } - - c.store.setMatch(from, p.id, to) - - for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, from, to) - } -} - func (p *choiceParser) parse(t Trace, c *context) { if p.commit&Documentation != 0 { c.fail(c.offset) @@ -140,7 +136,7 @@ func (p *choiceParser) parse(t Trace, c *context) { c.store.setMatch(from, p.id, to) for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, from, to) + c.store.setMatch(from, includedBy, to) } } diff --git a/parse.go b/parse.go index 91bb52d..d7d859d 100644 --- a/parse.go +++ b/parse.go @@ -6,6 +6,8 @@ type definition interface { nodeName() string nodeID() int setID(int) + init(*registry) error + setIncludedBy(*registry, int, *idSet) error parser(*registry, *idSet) (parser, error) commitType() CommitType // builder() builder @@ -14,8 +16,6 @@ type definition interface { type parser interface { nodeName() string nodeID() int - setIncludedBy(parser, *idSet) - storeIncluded(*context, int, int) // can be just an id set, taking what's excluded from the context parse(Trace, *context) } @@ -33,6 +33,46 @@ func cannotIncludeParsers(name string) error { return fmt.Errorf("parser: %s cannot include other parsers", name) } +func intsContain(is []int, i int) bool { + for _, ii := range is { + if ii == i { + return true + } + } + + return false +} + +func appendIfMissing(is []int, i int) []int { + if intsContain(is, i) { + return is + } + + return append(is, i) +} + +func setItemsIncludedBy(r *registry, items []string, includedBy int, parsers *idSet) error { + for _, item := range items { + di, ok := r.definition(item) + if !ok { + return ErrNoParsersDefined + } + + di.setIncludedBy(r, includedBy, parsers) + } + + return nil +} + +func sequenceItemNames(items []SequenceItem) []string { + names := make([]string, len(items)) + for i := range items { + names[i] = items[i].Name + } + + return names +} + func parse(t Trace, p parser, c *context) (*Node, error) { p.parse(t, c) if c.readErr != nil { diff --git a/sequence.go b/sequence.go index eecaa1d..c30582a 100644 --- a/sequence.go +++ b/sequence.go @@ -1,10 +1,11 @@ package treerack type sequenceDefinition struct { - name string - id int - commit CommitType - items []SequenceItem + name string + id int + commit CommitType + items []SequenceItem + includedBy []int } type sequenceParser struct { @@ -13,7 +14,7 @@ type sequenceParser struct { commit CommitType items []parser ranges [][]int - includedBy []parser + includedBy []int } func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition { @@ -28,6 +29,42 @@ func (d *sequenceDefinition) nodeName() string { return d.name } func (d *sequenceDefinition) nodeID() int { return d.id } func (d *sequenceDefinition) setID(id int) { d.id = id } +func (d *sequenceDefinition) includeItems() bool { + return len(d.items) == 1 && d.items[0].Min == 1 && d.items[0].Max == 1 +} + +func (d *sequenceDefinition) init(r *registry) error { + for _, item := range d.items { + if item.Min == 0 && item.Max == 0 { + item.Min, item.Max = 1, 1 + } else if item.Max == 0 { + item.Max = -1 + } + } + + if !d.includeItems() { + return nil + } + + parsers := &idSet{} + parsers.set(d.id) + return setItemsIncludedBy(r, sequenceItemNames(d.items), d.id, parsers) +} + +func (d *sequenceDefinition) setIncludedBy(r *registry, includedBy int, parsers *idSet) error { + if parsers.has(d.id) { + return nil + } + + d.includedBy = appendIfMissing(d.includedBy, includedBy) + if !d.includeItems() { + return nil + } + + parsers.set(d.id) + return setItemsIncludedBy(r, sequenceItemNames(d.items), includedBy, parsers) +} + func (d *sequenceDefinition) parser(r *registry, parsers *idSet) (parser, error) { if parsers.has(d.id) { panic(cannotIncludeParsers(d.name)) @@ -39,9 +76,10 @@ func (d *sequenceDefinition) parser(r *registry, parsers *idSet) (parser, error) } sp := &sequenceParser{ - name: d.name, - id: d.id, - commit: d.commit, + name: d.name, + id: d.id, + commit: d.commit, + includedBy: d.includedBy, } r.setParser(sp) @@ -81,11 +119,6 @@ func (d *sequenceDefinition) parser(r *registry, parsers *idSet) (parser, error) ranges = append(ranges, []int{item.Min, item.Max}) } - // for single items, acts like a choice - if len(items) == 1 && ranges[0][0] == 1 && ranges[0][1] == 1 { - items[0].setIncludedBy(sp, parsers) - } - sp.items = items sp.ranges = ranges return sp, nil @@ -98,26 +131,6 @@ func (d *sequenceDefinition) commitType() CommitType { func (p *sequenceParser) nodeName() string { return p.name } func (p *sequenceParser) nodeID() int { return p.id } -func (p *sequenceParser) setIncludedBy(includedBy parser, parsers *idSet) { - if parsers.has(p.id) { - return - } - - p.includedBy = append(p.includedBy, includedBy) -} - -func (p *sequenceParser) storeIncluded(c *context, from, to int) { - if !c.excluded(from, p.id) { - return - } - - c.store.setMatch(from, p.id, to) - - for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, from, to) - } -} - func (p *sequenceParser) parse(t Trace, c *context) { if p.commit&Documentation != 0 { c.fail(c.offset) @@ -169,7 +182,7 @@ func (p *sequenceParser) parse(t Trace, c *context) { } for _, includedBy := range p.includedBy { - includedBy.storeIncluded(c, from, to) + c.store.setMatch(from, includedBy, to) } c.store.setMatch(from, p.id, to) diff --git a/syntax.go b/syntax.go index 22cef17..424b6b3 100644 --- a/syntax.go +++ b/syntax.go @@ -143,6 +143,10 @@ func (s *Syntax) Init() error { return ErrRootAlias } + for _, p := range s.registry.definitions { + p.init(s.registry) + } + var err error s.parser, err = s.root.parser(s.registry, &idSet{}) if err != nil { From f6761a7e3d654b0cbd5c38f81a3721e4e4c4a306 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Thu, 27 Jul 2017 02:18:19 +0200 Subject: [PATCH 08/15] fix storage allocation --- store.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/store.go b/store.go index d3d90ce..515a324 100644 --- a/store.go +++ b/store.go @@ -52,7 +52,7 @@ func (s *store) ensureOffset(offset int) { } s.match = s.match[:cap(s.match)] - for i := cap(s.match); i <= offset; i++ { + for i := len(s.match); i <= offset; i++ { s.match = append(s.match, nil) } } From 1e7c0e8c2897992b5a0864d21fd5b6f7c0a3ef69 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sat, 29 Jul 2017 16:25:17 +0200 Subject: [PATCH 09/15] add builder definitions --- char.go | 27 ++++++++++++++++++--------- choice.go | 22 ++++++++++++++++++++-- context.go | 10 ++++++---- parse.go | 8 ++++---- sequence.go | 19 +++++++++++++++++-- store.go | 4 ++++ syntax.go | 4 ++-- 7 files changed, 71 insertions(+), 23 deletions(-) diff --git a/char.go b/char.go index 24dfd9a..4f588dd 100644 --- a/char.go +++ b/char.go @@ -3,7 +3,6 @@ package treerack type charParser struct { name string id int - commit CommitType not bool chars []rune ranges [][]rune @@ -12,14 +11,12 @@ type charParser struct { func newChar( name string, - ct CommitType, not bool, chars []rune, ranges [][]rune, ) *charParser { return &charParser{ name: name, - commit: ct, not: not, chars: chars, ranges: ranges, @@ -29,6 +26,7 @@ func newChar( func (p *charParser) nodeName() string { return p.name } func (p *charParser) nodeID() int { return p.id } func (p *charParser) setID(id int) { p.id = id } +func (p *charParser) commitType() CommitType { return Alias } func (p *charParser) init(r *registry) error { return nil } @@ -50,12 +48,8 @@ func (p *charParser) parser(r *registry, parsers *idSet) (parser, error) { return p, nil } -func (p *charParser) commitType() CommitType { - return p.commit -} - -func (p *charParser) storeIncluded(*context, int, int) { - panic(cannotIncludeParsers(p.name)) +func (p *charParser) builder() builder { + return p } func (p *charParser) match(t rune) bool { @@ -85,3 +79,18 @@ func (p *charParser) parse(t Trace, c *context) { c.store.setMatch(c.offset, includedBy, c.offset+1) } } + +func (p *charParser) build(c *context) ([]*Node, bool) { + t, ok := c.token() + if !ok { + panic("damaged parser context") + } + + if !p.match(t) { + return nil, false + } + + // always alias + c.offset++ + return nil, true +} diff --git a/choice.go b/choice.go index a0f9ce0..4377cf0 100644 --- a/choice.go +++ b/choice.go @@ -16,6 +16,12 @@ type choiceParser struct { includedBy []int } +type choiceBuilder struct { + name string + id int + commit CommitType +} + func newChoice(name string, ct CommitType, elements []string) *choiceDefinition { return &choiceDefinition{ name: name, @@ -27,6 +33,7 @@ func newChoice(name string, ct CommitType, elements []string) *choiceDefinition func (d *choiceDefinition) nodeName() string { return d.name } func (d *choiceDefinition) nodeID() int { return d.id } func (d *choiceDefinition) setID(id int) { d.id = id } +func (d *choiceDefinition) commitType() CommitType { return d.commit } func (d *choiceDefinition) init(r *registry) error { parsers := &idSet{} @@ -44,6 +51,10 @@ func (d *choiceDefinition) setIncludedBy(r *registry, includedBy int, parsers *i return setItemsIncludedBy(r, d.elements, includedBy, parsers) } +// TODO: +// - it may be possible to initialize the parsers non-recursively +// - maybe the whole definition, parser and builder can be united + func (d *choiceDefinition) parser(r *registry, parsers *idSet) (parser, error) { p, ok := r.parser(d.name) if ok { @@ -86,8 +97,8 @@ func (d *choiceDefinition) parser(r *registry, parsers *idSet) (parser, error) { return cp, nil } -func (d *choiceDefinition) commitType() CommitType { - return d.commit +func (d *choiceDefinition) builder() builder { + return &choiceBuilder{} } func (p *choiceParser) nodeName() string { return p.name } @@ -155,3 +166,10 @@ func (p *choiceParser) parse(t Trace, c *context) { c.fail(from) c.include(from, p.id) } + +func (b *choiceBuilder) nodeName() string { return b.name } +func (b *choiceBuilder) nodeID() int { return b.id } + +func (b *choiceBuilder) build(*context) ([]*Node, bool) { + return nil, false +} diff --git a/context.go b/context.go index 150e7e7..10bdb3f 100644 --- a/context.go +++ b/context.go @@ -127,10 +127,10 @@ func (c *context) fail(offset int) { c.match = false } -func (c *context) finalize() error { - return ErrNotImplemented - - if c.node.To < c.readOffset { +func (c *context) finalize(root parser) error { + rootID := root.nodeID() + to, match, found := c.store.getMatch(0, rootID) + if !found || !match || to < c.readOffset { return ErrUnexpectedCharacter } @@ -145,6 +145,8 @@ func (c *context) finalize() error { } } + return ErrNotImplemented + c.node.commit(c.tokens) return nil } diff --git a/parse.go b/parse.go index d7d859d..e646a68 100644 --- a/parse.go +++ b/parse.go @@ -5,12 +5,12 @@ import "fmt" type definition interface { nodeName() string nodeID() int + commitType() CommitType setID(int) init(*registry) error setIncludedBy(*registry, int, *idSet) error parser(*registry, *idSet) (parser, error) - commitType() CommitType - // builder() builder + builder() builder } type parser interface { @@ -22,7 +22,7 @@ type parser interface { type builder interface { nodeName() string nodeID() int - build(*context) *Node + build(*context) ([]*Node, bool) } func parserNotFound(name string) error { @@ -83,7 +83,7 @@ func parse(t Trace, p parser, c *context) (*Node, error) { return nil, ErrInvalidInput } - if err := c.finalize(); err != nil { + if err := c.finalize(p); err != nil { return nil, err } diff --git a/sequence.go b/sequence.go index c30582a..4daf082 100644 --- a/sequence.go +++ b/sequence.go @@ -17,6 +17,12 @@ type sequenceParser struct { includedBy []int } +type sequenceBuilder struct { + name string + id int + commit CommitType +} + func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition { return &sequenceDefinition{ name: name, @@ -28,6 +34,7 @@ func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefi func (d *sequenceDefinition) nodeName() string { return d.name } func (d *sequenceDefinition) nodeID() int { return d.id } func (d *sequenceDefinition) setID(id int) { d.id = id } +func (d *sequenceDefinition) commitType() CommitType { return d.commit } func (d *sequenceDefinition) includeItems() bool { return len(d.items) == 1 && d.items[0].Min == 1 && d.items[0].Max == 1 @@ -66,6 +73,7 @@ func (d *sequenceDefinition) setIncludedBy(r *registry, includedBy int, parsers } func (d *sequenceDefinition) parser(r *registry, parsers *idSet) (parser, error) { + // TODO: what is this for? test with sequence containing a sequence through a choice if parsers.has(d.id) { panic(cannotIncludeParsers(d.name)) } @@ -124,8 +132,8 @@ func (d *sequenceDefinition) parser(r *registry, parsers *idSet) (parser, error) return sp, nil } -func (d *sequenceDefinition) commitType() CommitType { - return d.commit +func (d *sequenceDefinition) builder() builder { + return &sequenceBuilder{} } func (p *sequenceParser) nodeName() string { return p.name } @@ -189,3 +197,10 @@ func (p *sequenceParser) parse(t Trace, c *context) { c.success(to) c.include(from, p.id) } + +func (b *sequenceBuilder) nodeName() string { return b.name } +func (b *sequenceBuilder) nodeID() int { return b.id } + +func (b *sequenceBuilder) build(*context) ([]*Node, bool) { + return nil, false +} diff --git a/store.go b/store.go index 515a324..dafeb67 100644 --- a/store.go +++ b/store.go @@ -58,6 +58,10 @@ func (s *store) ensureOffset(offset int) { } func (s *store) setMatch(offset, id, to int) { + if toe, match, ok := s.getMatch(offset, id); ok && match && toe == to { + return + } + s.ensureOffset(offset) s.match[offset] = append(s.match[offset], id, to) } diff --git a/syntax.go b/syntax.go index 424b6b3..2e2d6d8 100644 --- a/syntax.go +++ b/syntax.go @@ -88,7 +88,7 @@ func childName(name string, childIndex int) string { func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error { cname := childName(name, 0) - if err := s.register(newChar(cname, Alias, not, chars, ranges)); err != nil { + if err := s.register(newChar(cname, not, chars, ranges)); err != nil { return err } @@ -100,7 +100,7 @@ func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error { for i, ci := range chars { ref := childName(name, i) refs = append(refs, ref) - if err := s.register(newChar(ref, Alias, false, []rune{ci}, nil)); err != nil { + if err := s.register(newChar(ref, false, []rune{ci}, nil)); err != nil { return err } } From 73585dd07db65998c53c8668350e74af5b253b57 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sat, 29 Jul 2017 18:40:22 +0200 Subject: [PATCH 10/15] separate build phase for boot --- boot_test.go | 92 ++++++++++++++++++++---------------------- char.go | 6 +-- choice.go | 80 ++++++++++++++++++++++++++++++++----- context.go | 3 -- parse.go | 20 +++++++--- sequence.go | 111 +++++++++++++++++++++++++++++++++++++++++---------- store.go | 44 +++++++++++++++++--- syntax.go | 8 +++- 8 files changed, 268 insertions(+), 96 deletions(-) diff --git a/boot_test.go b/boot_test.go index 7bde6c6..ff5a192 100644 --- a/boot_test.go +++ b/boot_test.go @@ -13,7 +13,7 @@ func TestBoot(t *testing.T) { return } - f, err := os.Open("mml.parser") + f, err := os.Open("syntax.parser") if err != nil { t.Error(err) return @@ -23,6 +23,7 @@ func TestBoot(t *testing.T) { var d time.Duration const n = 120 + var n0 *Node for i := 0; i < n; i++ { if _, err := f.Seek(0, 0); err != nil { t.Error(err) @@ -30,10 +31,10 @@ func TestBoot(t *testing.T) { } start := time.Now() - _, err = b.Parse(f) + n0, err = b.Parse(f) d += time.Now().Sub(start) - if err != ErrNotImplemented { + if err != nil { t.Error(err) return } @@ -41,57 +42,52 @@ func TestBoot(t *testing.T) { t.Log("duration:", d/n) - // if err != nil { - // t.Error(err) - // return - // } + s0 := NewSyntax() + if err := define(s0, n0); err != nil { + t.Error(err) + return + } - // s0 := NewSyntax() - // if err := define(s0, n0); err != nil { - // t.Error(err) - // return - // } + _, err = f.Seek(0, 0) + if err != nil { + t.Error(err) + return + } - // _, err = f.Seek(0, 0) - // if err != nil { - // t.Error(err) - // return - // } + err = s0.Init() + if err != nil { + t.Error(err) + return + } - // err = s0.Init() - // if err != nil { - // t.Error(err) - // return - // } + n1, err := s0.Parse(f) + if err != nil { + t.Error(err) + return + } - // n1, err := s0.Parse(f) - // if err != nil { - // t.Error(err) - // return - // } + checkNode(t, n1, n0) + if t.Failed() { + return + } - // checkNode(t, n1, n0) - // if t.Failed() { - // return - // } + s1 := NewSyntax() + if err := define(s1, n1); err != nil { + t.Error(err) + return + } - // s1 := NewSyntax() - // if err := define(s1, n1); err != nil { - // t.Error(err) - // return - // } + _, err = f.Seek(0, 0) + if err != nil { + t.Error(err) + return + } - // _, err = f.Seek(0, 0) - // if err != nil { - // t.Error(err) - // return - // } + n2, err := s1.Parse(f) + if err != nil { + t.Error(err) + return + } - // n2, err := s1.Parse(f) - // if err != nil { - // t.Error(err) - // return - // } - - // checkNode(t, n2, n1) + checkNode(t, n2, n1) } diff --git a/char.go b/char.go index 4f588dd..54e562c 100644 --- a/char.go +++ b/char.go @@ -23,9 +23,9 @@ func newChar( } } -func (p *charParser) nodeName() string { return p.name } -func (p *charParser) nodeID() int { return p.id } -func (p *charParser) setID(id int) { p.id = id } +func (p *charParser) nodeName() string { return p.name } +func (p *charParser) nodeID() int { return p.id } +func (p *charParser) setID(id int) { p.id = id } func (p *charParser) commitType() CommitType { return Alias } func (p *charParser) init(r *registry) error { return nil } diff --git a/choice.go b/choice.go index 4377cf0..07e4f88 100644 --- a/choice.go +++ b/choice.go @@ -6,6 +6,7 @@ type choiceDefinition struct { commit CommitType elements []string includedBy []int + cbuilder *choiceBuilder } type choiceParser struct { @@ -17,9 +18,10 @@ type choiceParser struct { } type choiceBuilder struct { - name string - id int - commit CommitType + name string + id int + commit CommitType + elements []builder } func newChoice(name string, ct CommitType, elements []string) *choiceDefinition { @@ -30,12 +32,29 @@ func newChoice(name string, ct CommitType, elements []string) *choiceDefinition } } -func (d *choiceDefinition) nodeName() string { return d.name } -func (d *choiceDefinition) nodeID() int { return d.id } -func (d *choiceDefinition) setID(id int) { d.id = id } +func (d *choiceDefinition) nodeName() string { return d.name } +func (d *choiceDefinition) nodeID() int { return d.id } +func (d *choiceDefinition) setID(id int) { d.id = id } func (d *choiceDefinition) commitType() CommitType { return d.commit } func (d *choiceDefinition) init(r *registry) error { + if d.cbuilder == nil { + d.cbuilder = &choiceBuilder{ + name: d.name, + id: d.id, + commit: d.commit, + } + } + + for _, e := range d.elements { + def, ok := r.definition(e) + if !ok { + return parserNotFound(e) + } + + d.cbuilder.elements = append(d.cbuilder.elements, def.builder()) + } + parsers := &idSet{} parsers.set(d.id) return setItemsIncludedBy(r, d.elements, d.id, parsers) @@ -98,7 +117,15 @@ func (d *choiceDefinition) parser(r *registry, parsers *idSet) (parser, error) { } func (d *choiceDefinition) builder() builder { - return &choiceBuilder{} + if d.cbuilder == nil { + d.cbuilder = &choiceBuilder{ + name: d.name, + id: d.id, + commit: d.commit, + } + } + + return d.cbuilder } func (p *choiceParser) nodeName() string { return p.name } @@ -170,6 +197,41 @@ func (p *choiceParser) parse(t Trace, c *context) { func (b *choiceBuilder) nodeName() string { return b.name } func (b *choiceBuilder) nodeID() int { return b.id } -func (b *choiceBuilder) build(*context) ([]*Node, bool) { - return nil, false +func (b *choiceBuilder) build(c *context) ([]*Node, bool) { + to, ok := c.store.takeMatch(c.offset, b.id) + if !ok { + return nil, false + } + + var element builder + for _, e := range b.elements { + elementTo, match, _ := c.store.getMatch(c.offset, e.nodeID()) + if match && elementTo == to { + element = e + break + } + } + + if element == nil { + panic("damaged parse result") + } + + from := c.offset + + n, ok := element.build(c) + if !ok { + panic("damaged parse result") + } + + if b.commit&Alias != 0 { + return n, true + } + + return []*Node{{ + Name: b.name, + From: from, + To: to, + Nodes: n, + tokens: c.tokens, + }}, true } diff --git a/context.go b/context.go index 10bdb3f..b9ea63e 100644 --- a/context.go +++ b/context.go @@ -145,8 +145,5 @@ func (c *context) finalize(root parser) error { } } - return ErrNotImplemented - - c.node.commit(c.tokens) return nil } diff --git a/parse.go b/parse.go index e646a68..958b505 100644 --- a/parse.go +++ b/parse.go @@ -73,19 +73,29 @@ func sequenceItemNames(items []SequenceItem) []string { return names } -func parse(t Trace, p parser, c *context) (*Node, error) { +func parse(t Trace, p parser, c *context) error { p.parse(t, c) if c.readErr != nil { - return nil, c.readErr + return c.readErr } if !c.match { - return nil, ErrInvalidInput + return ErrInvalidInput } if err := c.finalize(p); err != nil { - return nil, err + return err } - return c.node, nil + return nil +} + +func build(b builder, c *context) *Node { + c.offset = 0 + n, ok := b.build(c) + if !ok || len(n) != 1 { + panic("damaged parse result") + } + + return n[0] } diff --git a/sequence.go b/sequence.go index 4daf082..4473644 100644 --- a/sequence.go +++ b/sequence.go @@ -6,6 +6,8 @@ type sequenceDefinition struct { commit CommitType items []SequenceItem includedBy []int + ranges [][]int + sbuilder *sequenceBuilder } type sequenceParser struct { @@ -18,9 +20,11 @@ type sequenceParser struct { } type sequenceBuilder struct { - name string - id int + name string + id int commit CommitType + items []builder + ranges [][]int } func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition { @@ -31,9 +35,9 @@ func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefi } } -func (d *sequenceDefinition) nodeName() string { return d.name } -func (d *sequenceDefinition) nodeID() int { return d.id } -func (d *sequenceDefinition) setID(id int) { d.id = id } +func (d *sequenceDefinition) nodeName() string { return d.name } +func (d *sequenceDefinition) nodeID() int { return d.id } +func (d *sequenceDefinition) setID(id int) { d.id = id } func (d *sequenceDefinition) commitType() CommitType { return d.commit } func (d *sequenceDefinition) includeItems() bool { @@ -41,14 +45,33 @@ func (d *sequenceDefinition) includeItems() bool { } func (d *sequenceDefinition) init(r *registry) error { + if d.sbuilder == nil { + d.sbuilder = &sequenceBuilder{ + name: d.name, + id: d.id, + commit: d.commit, + } + } + for _, item := range d.items { if item.Min == 0 && item.Max == 0 { item.Min, item.Max = 1, 1 } else if item.Max == 0 { item.Max = -1 } + + d.ranges = append(d.ranges, []int{item.Min, item.Max}) + + def, ok := r.definition(item.Name) + if !ok { + return parserNotFound(item.Name) + } + + d.sbuilder.items = append(d.sbuilder.items, def.builder()) } + d.sbuilder.ranges = d.ranges + if !d.includeItems() { return nil } @@ -92,24 +115,13 @@ func (d *sequenceDefinition) parser(r *registry, parsers *idSet) (parser, error) r.setParser(sp) - var ( - items []parser - ranges [][]int - ) - + var items []parser parsers.set(d.id) defer parsers.unset(d.id) for _, item := range d.items { - if item.Min == 0 && item.Max == 0 { - item.Min, item.Max = 1, 1 - } else if item.Max == 0 { - item.Max = -1 - } - pi, ok := r.parser(item.Name) if ok { items = append(items, pi) - ranges = append(ranges, []int{item.Min, item.Max}) continue } @@ -124,16 +136,23 @@ func (d *sequenceDefinition) parser(r *registry, parsers *idSet) (parser, error) } items = append(items, pi) - ranges = append(ranges, []int{item.Min, item.Max}) } sp.items = items - sp.ranges = ranges + sp.ranges = d.ranges return sp, nil } func (d *sequenceDefinition) builder() builder { - return &sequenceBuilder{} + if d.sbuilder == nil { + d.sbuilder = &sequenceBuilder{ + name: d.name, + id: d.id, + commit: d.commit, + } + } + + return d.sbuilder } func (p *sequenceParser) nodeName() string { return p.name } @@ -162,6 +181,7 @@ func (p *sequenceParser) parse(t Trace, c *context) { to := c.offset for itemIndex < len(p.items) { + // TODO: is it ok to parse before max range check? what if max=0 p.items[itemIndex].parse(t, c) if !c.match { if currentCount < p.ranges[itemIndex][0] { @@ -201,6 +221,53 @@ func (p *sequenceParser) parse(t Trace, c *context) { func (b *sequenceBuilder) nodeName() string { return b.name } func (b *sequenceBuilder) nodeID() int { return b.id } -func (b *sequenceBuilder) build(*context) ([]*Node, bool) { - return nil, false +func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { + to, ok := c.store.takeMatch(c.offset, b.id) + if !ok { + return nil, false + } + + from := c.offset + var ( + itemIndex int + currentCount int + nodes []*Node + ) + + for itemIndex < len(b.items) { + itemFrom := c.offset + n, ok := b.items[itemIndex].build(c) + if !ok { + if currentCount < b.ranges[itemIndex][0] { + panic("damaged parse result") + } + + itemIndex++ + currentCount = 0 + continue + } + + parsed := c.offset > itemFrom + if parsed { + nodes = append(nodes, n...) + currentCount++ + } + + if !parsed || b.ranges[itemIndex][1] >= 0 && currentCount == b.ranges[itemIndex][1] { + itemIndex++ + currentCount = 0 + } + } + + if b.commit&Alias != 0 { + return nodes, true + } + + return []*Node{{ + Name: b.name, + From: from, + To: to, + Nodes: nodes, + tokens: c.tokens, + }}, true } diff --git a/store.go b/store.go index dafeb67..9d53797 100644 --- a/store.go +++ b/store.go @@ -23,8 +23,8 @@ func (s *store) getMatch(offset, id int) (int, bool, bool) { } var ( - found bool - length int + found bool + to int ) for i := 0; i < len(s.match[offset]); i += 2 { @@ -33,12 +33,46 @@ func (s *store) getMatch(offset, id int) (int, bool, bool) { } found = true - if s.match[offset][i+1] > length { - length = s.match[offset][i+1] + if s.match[offset][i+1] > to { + to = s.match[offset][i+1] } } - return length, found, found + return to, found, found +} + +func (s *store) takeMatch(offset, id int) (int, bool) { + if s.hasNoMatch(offset, id) { + return 0, false + } + + if len(s.match) <= offset { + return 0, false + } + + var ( + found bool + to int + index int + ) + + for i := 0; i < len(s.match[offset]); i += 2 { + if s.match[offset][i] != id { + continue + } + + found = true + if s.match[offset][i+1] > to { + to = s.match[offset][i+1] + index = i + } + } + + if found { + s.match[offset][index] = -1 + } + + return to, found } func (s *store) ensureOffset(offset int) { diff --git a/syntax.go b/syntax.go index 2e2d6d8..d6275a6 100644 --- a/syntax.go +++ b/syntax.go @@ -29,6 +29,7 @@ type Syntax struct { explicitRoot bool root definition parser parser + builder builder } var ( @@ -154,6 +155,7 @@ func (s *Syntax) Init() error { return err } + s.builder = s.root.builder() s.initialized = true return nil } @@ -174,5 +176,9 @@ func (s *Syntax) Parse(r io.Reader) (*Node, error) { } c := newContext(bufio.NewReader(r)) - return parse(s.trace, s.parser, c) + if err := parse(s.trace, s.parser, c); err != nil { + return nil, err + } + + return build(s.builder, c), nil } From b1969e802f3102a2f37f1bcbd306a6b4bc8f7b2d Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sat, 29 Jul 2017 20:04:22 +0200 Subject: [PATCH 11/15] remove included during build --- choice.go | 24 ++++++++++++++++++++---- json.parser | 12 ++++++------ mml_test.go | 2 +- sequence.go | 28 ++++++++++++++++++++++------ store.go | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 83 insertions(+), 17 deletions(-) diff --git a/choice.go b/choice.go index 07e4f88..e759f80 100644 --- a/choice.go +++ b/choice.go @@ -18,10 +18,11 @@ type choiceParser struct { } type choiceBuilder struct { - name string - id int - commit CommitType - elements []builder + name string + id int + commit CommitType + elements []builder + includedBy []int } func newChoice(name string, ct CommitType, elements []string) *choiceDefinition { @@ -66,6 +67,17 @@ func (d *choiceDefinition) setIncludedBy(r *registry, includedBy int, parsers *i } d.includedBy = appendIfMissing(d.includedBy, includedBy) + + if d.cbuilder == nil { + d.cbuilder = &choiceBuilder{ + name: d.name, + id: d.id, + commit: d.commit, + } + } + + d.cbuilder.includedBy = appendIfMissing(d.cbuilder.includedBy, includedBy) + parsers.set(d.id) return setItemsIncludedBy(r, d.elements, includedBy, parsers) } @@ -203,6 +215,10 @@ func (b *choiceBuilder) build(c *context) ([]*Node, bool) { return nil, false } + for _, ib := range b.includedBy { + c.store.takeMatchLength(c.offset, ib, to) + } + var element builder for _, e := range b.elements { elementTo, match, _ := c.store.getMatch(c.offset, e.nodeID()) diff --git a/json.parser b/json.parser index c7d40e8..de8c34d 100644 --- a/json.parser +++ b/json.parser @@ -1,12 +1,12 @@ // JSON (http://www.json.org) -ws:ws = [ \b\f\n\r\t]; +ws:alias = [ \b\f\n\r\t]; true = "true"; false = "false"; null = "null"; -string:nows = "\"" ([^\\"\b\f\n\r\t] | "\\" (["\\/bfnrt] | "u" [0-9a-f]{4}))* "\""; -number:nows = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; -entry = string ":" value; -object = "{" (entry ("," entry)*)? "}"; -array = "[" (value ("," value)*)? "]"; +string = "\"" ([^\\"\b\f\n\r\t] | "\\" (["\\/bfnrt] | "u" [0-9a-f]{4}))* "\""; +number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; +entry = string ws* ":" ws* value; +object = "{" ws* (entry ws* ("," ws* entry)*)? ws* "}"; +array = "[" ws* (value ws* ("," ws* value)*)? ws* "]"; value:alias = true | false | null | string | number | object | array; json:root = value; diff --git a/mml_test.go b/mml_test.go index ac3cc16..db47e05 100644 --- a/mml_test.go +++ b/mml_test.go @@ -9,7 +9,7 @@ import ( ) func TestMML(t *testing.T) { - test(t, "mml.parser", "mml", []testItem{{ + testTrace(t, "mml.parser", "mml", 1, []testItem{{ msg: "empty", node: &Node{Name: "mml"}, }, { diff --git a/sequence.go b/sequence.go index 4473644..623327f 100644 --- a/sequence.go +++ b/sequence.go @@ -20,11 +20,12 @@ type sequenceParser struct { } type sequenceBuilder struct { - name string - id int - commit CommitType - items []builder - ranges [][]int + name string + id int + commit CommitType + items []builder + ranges [][]int + includedBy []int } func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition { @@ -87,6 +88,17 @@ func (d *sequenceDefinition) setIncludedBy(r *registry, includedBy int, parsers } d.includedBy = appendIfMissing(d.includedBy, includedBy) + + if d.sbuilder == nil { + d.sbuilder = &sequenceBuilder{ + name: d.name, + id: d.id, + commit: d.commit, + } + } + + d.sbuilder.includedBy = appendIfMissing(d.sbuilder.includedBy, includedBy) + if !d.includeItems() { return nil } @@ -227,6 +239,10 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { return nil, false } + for _, ib := range b.includedBy { + c.store.takeMatchLength(c.offset, ib, to) + } + from := c.offset var ( itemIndex int @@ -239,7 +255,7 @@ func (b *sequenceBuilder) build(c *context) ([]*Node, bool) { n, ok := b.items[itemIndex].build(c) if !ok { if currentCount < b.ranges[itemIndex][0] { - panic("damaged parse result") + panic(b.name + ": damaged parse result") } itemIndex++ diff --git a/store.go b/store.go index 9d53797..e40b539 100644 --- a/store.go +++ b/store.go @@ -75,6 +75,40 @@ func (s *store) takeMatch(offset, id int) (int, bool) { return to, found } +func (s *store) takeMatchLength(offset, id, to int) (int, bool) { + if s.hasNoMatch(offset, id) { + return 0, false + } + + if len(s.match) <= offset { + return 0, false + } + + var ( + found bool + // index int + ) + + for i := 0; i < len(s.match[offset]); i += 2 { + if s.match[offset][i] != id { + continue + } + + found = true + if s.match[offset][i+1] == to { + s.match[offset][i] = -1 + return to, true + //eindex = i + } + } + + if found { + // s.match[offset][index] = -1 + } + + return to, found +} + func (s *store) ensureOffset(offset int) { if len(s.match) > offset { return From 3aa2d9a6b0d89672c29e2745114ac93171429b77 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sat, 29 Jul 2017 22:31:16 +0200 Subject: [PATCH 12/15] enable tracing, disable storing included --- char.go | 5 +++++ choice.go | 15 +++++++++++++-- mml_test.go | 2 +- sequence.go | 11 ++++++++++- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/char.go b/char.go index 54e562c..b0c833a 100644 --- a/char.go +++ b/char.go @@ -69,11 +69,16 @@ func (p *charParser) match(t rune) bool { } func (p *charParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing", c.offset) + if tok, ok := c.token(); !ok || !p.match(tok) { + t.Out1("fail") c.fail(c.offset) return } + t.Out1("success") c.success(c.offset + 1) for _, includedBy := range p.includedBy { c.store.setMatch(c.offset, includedBy, c.offset+1) diff --git a/choice.go b/choice.go index e759f80..9c78e13 100644 --- a/choice.go +++ b/choice.go @@ -144,16 +144,22 @@ func (p *choiceParser) nodeName() string { return p.name } func (p *choiceParser) nodeID() int { return p.id } func (p *choiceParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing", c.offset) + + // TODO: don't add documentation if p.commit&Documentation != 0 { c.fail(c.offset) return } - if _, ok := c.fromStore(p.id); ok { + if m, ok := c.fromStore(p.id); ok { + t.Out1("found in cache", m) return } if c.excluded(c.offset, p.id) { + t.Out1("fail, excluded") c.fail(c.offset) return } @@ -186,7 +192,10 @@ func (p *choiceParser) parse(t Trace, c *context) { c.store.setMatch(from, p.id, to) for _, includedBy := range p.includedBy { - c.store.setMatch(from, includedBy, to) + if !c.excluded(from, includedBy) { + t.Out1("storing included", includedBy) + // c.store.setMatch(from, includedBy, to) + } } } @@ -198,9 +207,11 @@ func (p *choiceParser) parse(t Trace, c *context) { if match { c.success(to) c.include(from, p.id) + t.Out1("success") return } + t.Out1("fail") c.store.setNoMatch(from, p.id) c.fail(from) c.include(from, p.id) diff --git a/mml_test.go b/mml_test.go index fb519a5..0921296 100644 --- a/mml_test.go +++ b/mml_test.go @@ -9,7 +9,7 @@ import ( ) func TestMML(t *testing.T) { - testTrace(t, "mml.parser", "mml", 1, []testItem{{ + testTrace(t, "mml.parser", "mml", 0, []testItem{{ msg: "empty", node: &Node{Name: "mml"}, }, { diff --git a/sequence.go b/sequence.go index 623327f..a86cc3b 100644 --- a/sequence.go +++ b/sequence.go @@ -171,12 +171,16 @@ func (p *sequenceParser) nodeName() string { return p.name } func (p *sequenceParser) nodeID() int { return p.id } func (p *sequenceParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing", c.offset) + if p.commit&Documentation != 0 { c.fail(c.offset) return } if c.excluded(c.offset, p.id) { + t.Out1("fail, excluded") c.fail(c.offset) return } @@ -200,6 +204,7 @@ func (p *sequenceParser) parse(t Trace, c *context) { // c.store.setNoMatch(from, p.id) c.fail(from) c.include(from, p.id) + t.Out1("fail, not enough items") return } @@ -222,9 +227,13 @@ func (p *sequenceParser) parse(t Trace, c *context) { } for _, includedBy := range p.includedBy { - c.store.setMatch(from, includedBy, to) + if !c.excluded(from, includedBy) { + t.Out1("storing included", includedBy) + // c.store.setMatch(from, includedBy, to) + } } + t.Out1("success") c.store.setMatch(from, p.id, to) c.success(to) c.include(from, p.id) From c83887a87664509bf2ebcaa8b6e44fdf82408c57 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sat, 29 Jul 2017 23:05:57 +0200 Subject: [PATCH 13/15] fix storing included --- choice.go | 4 ++-- sequence.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/choice.go b/choice.go index 9c78e13..c5634f2 100644 --- a/choice.go +++ b/choice.go @@ -192,9 +192,9 @@ func (p *choiceParser) parse(t Trace, c *context) { c.store.setMatch(from, p.id, to) for _, includedBy := range p.includedBy { - if !c.excluded(from, includedBy) { + if c.excluded(from, includedBy) { t.Out1("storing included", includedBy) - // c.store.setMatch(from, includedBy, to) + c.store.setMatch(from, includedBy, to) } } } diff --git a/sequence.go b/sequence.go index a86cc3b..e41d406 100644 --- a/sequence.go +++ b/sequence.go @@ -227,9 +227,9 @@ func (p *sequenceParser) parse(t Trace, c *context) { } for _, includedBy := range p.includedBy { - if !c.excluded(from, includedBy) { + if c.excluded(from, includedBy) { t.Out1("storing included", includedBy) - // c.store.setMatch(from, includedBy, to) + c.store.setMatch(from, includedBy, to) } } From aa498473e70de726e85c30c9efa8d31ef23bdc03 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sat, 29 Jul 2017 23:16:03 +0200 Subject: [PATCH 14/15] add todos --- store.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/store.go b/store.go index cd0e3e0..37e53fc 100644 --- a/store.go +++ b/store.go @@ -1,5 +1,10 @@ package treerack +// TODO: +// - store it similarly to the excluded ones? sorted by offset? +// - use a helper field for the last accessed position to walk from there? for every offset? +// - use a helper field to store the largest value and its index, too? for an offset? + type store struct { noMatch []*idSet match [][]int From a592d1b4a5fdc4526ea12c013d28f944ecf93e8a Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sat, 29 Jul 2017 23:26:08 +0200 Subject: [PATCH 15/15] disable tracing --- char.go | 8 ++++---- choice.go | 18 +++++++++--------- sequence.go | 14 ++++++++------ 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/char.go b/char.go index b0c833a..d09a066 100644 --- a/char.go +++ b/char.go @@ -69,16 +69,16 @@ func (p *charParser) match(t rune) bool { } func (p *charParser) parse(t Trace, c *context) { - t = t.Extend(p.name) - t.Out1("parsing", c.offset) + // t = t.Extend(p.name) + // t.Out1("parsing", c.offset) if tok, ok := c.token(); !ok || !p.match(tok) { - t.Out1("fail") + // t.Out1("fail") c.fail(c.offset) return } - t.Out1("success") + // t.Out1("success") c.success(c.offset + 1) for _, includedBy := range p.includedBy { c.store.setMatch(c.offset, includedBy, c.offset+1) diff --git a/choice.go b/choice.go index bf77cc9..4b7193f 100644 --- a/choice.go +++ b/choice.go @@ -144,23 +144,23 @@ func (p *choiceParser) nodeName() string { return p.name } func (p *choiceParser) nodeID() int { return p.id } func (p *choiceParser) parse(t Trace, c *context) { - t = t.Extend(p.name) - t.Out1("parsing choice", c.offset) + // t = t.Extend(p.name) + // t.Out1("parsing choice", c.offset) // TODO: don't add documentation if p.commit&Documentation != 0 { - t.Out1("fail, doc") + // t.Out1("fail, doc") c.fail(c.offset) return } - if m, ok := c.fromStore(p.id); ok { - t.Out1("found in store, match:", m) + if _, ok := c.fromStore(p.id); ok { + // t.Out1("found in store, match:", m) return } if c.excluded(c.offset, p.id) { - t.Out1("fail, excluded") + // t.Out1("fail, excluded") c.fail(c.offset) return } @@ -194,7 +194,7 @@ func (p *choiceParser) parse(t Trace, c *context) { c.store.setMatch(from, p.id, to) for _, includedBy := range p.includedBy { if c.excluded(from, includedBy) { - t.Out1("storing included", includedBy) + // t.Out1("storing included", includedBy) c.store.setMatch(from, includedBy, to) } } @@ -208,11 +208,11 @@ func (p *choiceParser) parse(t Trace, c *context) { if match { c.success(to) c.include(from, p.id) - t.Out1("choice, success") + // t.Out1("choice, success") return } - t.Out1("fail") + // t.Out1("fail") c.store.setNoMatch(from, p.id) c.fail(from) c.include(from, p.id) diff --git a/sequence.go b/sequence.go index b952dc0..d69d098 100644 --- a/sequence.go +++ b/sequence.go @@ -171,15 +171,17 @@ func (p *sequenceParser) nodeName() string { return p.name } func (p *sequenceParser) nodeID() int { return p.id } func (p *sequenceParser) parse(t Trace, c *context) { - t = t.Extend(p.name) + // t = t.Extend(p.name) + // t.Out1("parsing sequence") + if p.commit&Documentation != 0 { - t.Out1("fail, doc") + // t.Out1("fail, doc") c.fail(c.offset) return } if c.excluded(c.offset, p.id) { - t.Out1("fail, excluded") + // t.Out1("fail, excluded") c.fail(c.offset) return } @@ -203,7 +205,7 @@ func (p *sequenceParser) parse(t Trace, c *context) { // c.store.setNoMatch(from, p.id) c.fail(from) c.include(from, p.id) - t.Out1("fail, not enough items") + // t.Out1("fail, not enough items") return } @@ -227,12 +229,12 @@ func (p *sequenceParser) parse(t Trace, c *context) { for _, includedBy := range p.includedBy { if c.excluded(from, includedBy) { - t.Out1("storing included", includedBy) + // t.Out1("storing included", includedBy) c.store.setMatch(from, includedBy, to) } } - t.Out1("success") + // t.Out1("success") c.store.setMatch(from, p.id, to) c.success(to) c.include(from, p.id)