diff --git a/buzz.txt b/buzz.txt index 300475b..4e20506 100644 --- a/buzz.txt +++ b/buzz.txt @@ -1,5 +1,9 @@ generator, in-process init or command line -syntax from file or in-memory +syntax from text or defined during runtime, or combined simple syntax with recursion no lexer required utf8, 8bit or custom tokens +abstract syntax tree from text of arbitrary syntax +reading from streams +context free, however support for custom tokens in the input +custom tokens for indentation built in diff --git a/char.go b/char.go index 5bc0df8..e5dcbb9 100644 --- a/char.go +++ b/char.go @@ -52,7 +52,7 @@ func (p *charParser) setIncludedBy(includedBy parser, path []string) { p.includedBy = append(p.includedBy, includedBy) } -func (p *charParser) cacheIncluded(*context, *Node) { +func (p *charParser) storeIncluded(*context, *Node) { panic(cannotIncludeParsers(p.name)) } @@ -82,24 +82,24 @@ func (p *charParser) parse(t Trace, c *context) { return } - if _, ok := c.fromCache(p.name); ok { - // t.Out1("found in cache, match:", m) + if _, ok := c.fromStore(p.name); ok { + // t.Out1("found in store, match:", m) return } if tok, ok := c.token(); ok && p.match(tok) { // t.Out1("success", string(tok)) n := newNode(p.name, c.offset, c.offset+1, p.commit) - c.cache.set(c.offset, p.name, n) + c.store.set(c.offset, p.name, n) for _, includedBy := range p.includedBy { - includedBy.cacheIncluded(c, n) + includedBy.storeIncluded(c, n) } c.success(n) return } else { // t.Out1("fail", string(tok)) - c.cache.set(c.offset, p.name, nil) + c.store.set(c.offset, p.name, nil) c.fail(c.offset) return } diff --git a/choice.go b/choice.go index 3304037..4b49cee 100644 --- a/choice.go +++ b/choice.go @@ -78,17 +78,17 @@ func (p *choiceParser) setIncludedBy(includedBy parser, path []string) { p.includedBy = append(p.includedBy, includedBy) } -func (p *choiceParser) cacheIncluded(c *context, n *Node) { +func (p *choiceParser) storeIncluded(c *context, n *Node) { if !c.excluded(n.From, p.name) { return } nc := newNode(p.name, n.From, n.To, p.commit) nc.append(n) - c.cache.set(nc.From, p.name, nc) + c.store.set(nc.From, p.name, nc) for _, includedBy := range p.includedBy { - includedBy.cacheIncluded(c, nc) + includedBy.storeIncluded(c, nc) } } @@ -102,8 +102,8 @@ func (p *choiceParser) parse(t Trace, c *context) { return } - if _, ok := c.fromCache(p.name); ok { - // t.Out1("found in cache, match:", m) + if _, ok := c.fromStore(p.name); ok { + // t.Out1("found in store, match:", m) return } @@ -137,9 +137,9 @@ func (p *choiceParser) parse(t Trace, c *context) { node = newNode(p.name, c.offset, c.offset, p.commit) node.append(c.node) - c.cache.set(node.From, p.name, node) + c.store.set(node.From, p.name, node) for _, includedBy := range p.includedBy { - includedBy.cacheIncluded(c, node) + includedBy.storeIncluded(c, node) } } @@ -155,6 +155,6 @@ func (p *choiceParser) parse(t Trace, c *context) { } // t.Out1("fail") - c.cache.set(node.From, p.name, nil) + c.store.set(node.From, p.name, nil) c.fail(node.From) } diff --git a/context.go b/context.go index 46b05cf..30e80da 100644 --- a/context.go +++ b/context.go @@ -11,7 +11,7 @@ type context struct { readOffset int readErr error eof bool - cache *cache + store *store tokens []rune match bool node *Node @@ -21,7 +21,7 @@ type context struct { func newContext(r io.RuneReader) *context { return &context{ reader: r, - cache: &cache{}, + store: &store{}, } } @@ -46,7 +46,7 @@ func (c *context) read() bool { c.readOffset++ if t == unicode.ReplacementChar { - c.readErr = ErrInvalidCharacter + c.readErr = ErrInvalidUnicodeCharacter return false } @@ -105,8 +105,8 @@ func (c *context) include(offset int, name string) { } } -func (c *context) fromCache(name string) (bool, bool) { - n, m, ok := c.cache.get(c.offset, name) +func (c *context) fromStore(name string) (bool, bool) { + n, m, ok := c.store.get(c.offset, name) if !ok { return false, false } diff --git a/define.go b/define.go index c382788..b53e8e5 100644 --- a/define.go +++ b/define.go @@ -48,14 +48,14 @@ func unescape(escape rune, banned []rune, chars []rune) ([]rune, error) { case ci == escape: escaped = true case runesContain(banned, ci): - return nil, ErrInvalidCharacter + return nil, ErrInvalidEscapeCharacter default: unescaped = append(unescaped, ci) } } if escaped { - return nil, ErrInvalidCharacter + return nil, ErrInvalidEscapeCharacter } return unescaped, nil diff --git a/json.parser b/json.parser index cef6674..ade4df9 100644 --- a/json.parser +++ b/json.parser @@ -10,5 +10,3 @@ object = "{" ws* (entry (ws* "," ws* entry)*)? ws* "}"; array = "[" ws* (value (ws* "," ws* value)*)? ws* "]"; value:alias = true | false | null | string | number | object | array; json = value; - -// TODO: value should be an alias but test it first like this diff --git a/keyval.parser b/keyval.parser index 4eff543..6422e24 100644 --- a/keyval.parser +++ b/keyval.parser @@ -22,8 +22,6 @@ key-val = (comment "\n" ws*)? (key | key? ws* "=" ws* value?); entry:alias = group-key | key-val; doc:root = (entry (ws* comment-line)? | wsnlc)*; -// TODO: not tested -// set as root for streaming: single-entry = (entry (ws* comment-line)? | wsnlc* entry (ws* comment-line)?) []; diff --git a/mml.parser b/mml.parser index f051784..abc0137 100644 --- a/mml.parser +++ b/mml.parser @@ -101,7 +101,6 @@ channel = "<>" | "<" wsnlc* int wsnlc* ">"; and-expression:doc = "and" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; or-expression:doc = "or" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; -// TODO: use collect argument-list:alias = static-symbol (list-sep static-symbol)*; collect-symbol = "..." wsnlc* static-symbol; function-fact:alias = "(" (wsnlc | ",")* @@ -128,7 +127,7 @@ range-to = expression; range-expression:alias = range-from? wsnlc* ":" wsnlc* range-to?; indexer-expression:alias = expression | range-expression; expression-indexer:alias = primary-expression wsc* "[" wsnlc* indexer-expression wsnlc* "]"; -symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression; // TODO: test with a float on a new line +symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression; indexer = expression-indexer | symbol-indexer; function-application = primary-expression wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; @@ -140,11 +139,10 @@ if = "if" wsnlc* expression wsnlc* block default = "default" wsnlc* ":"; default-line:alias = default (wsnlc | ";")* statement?; case = "case" wsnlc* expression wsnlc* ":"; -case-line:alias = case (wsnlc | ";")* statement?; +case-line:alias = case (wsc | ";")* statement?; switch = "switch" wsnlc* expression? wsnlc* "{" (wsnlc | ";")* ((case-line | default-line) (sep (case-line | default-line | statement))*)? (wsnlc | ";")* "}"; -// TODO: empty case not handled int-type = "int"; float-type = "float"; @@ -252,7 +250,7 @@ match-set:alias = type-set | match-fact; match-expression:alias = match-set | static-symbol wsc* match-set; match-case = "case" wsnlc* match-expression wsnlc* ":"; -match-case-line:alias = match-case (wsnlc | ";")* statement?; +match-case-line:alias = match-case (wsc | ";")* statement?; match = "match" wsnlc* expression wsnlc* "{" (wsnlc | ";")* ((match-case-line | default-line) (sep (match-case-line | default-line | statement))*)? @@ -286,7 +284,7 @@ communication-group:alias = "(" wsnlc* communication wsnlc* ")"; communication:alias = receive-expression | receive-statement | send | communication-group; select-case = "case" wsnlc* communication wsnlc* ":"; -select-case-line:alias = select-case (wsnlc | ";")* statement?; +select-case-line:alias = select-case (wsc | ";")* statement?; select = "select" wsnlc* "{" (wsnlc | ";")* ((select-case-line | default-line) (sep (select-case-line | default-line | statement))*)? @@ -497,10 +495,6 @@ definition:alias = value-definition | function-definition-group | effect-definition-group; -// TODO: cannot do: -// type alias a int|fn () string|error -// needs grouping of type-set - type-alias = "type" wsnlc* "alias" wsnlc* static-symbol wsnlc* type-set; type-constraint = "type" wsnlc* static-symbol wsnlc* type-set; diff --git a/mml_test.go b/mml_test.go index 3435dd6..1b9d0ee 100644 --- a/mml_test.go +++ b/mml_test.go @@ -1028,6 +1028,18 @@ func TestMML(t *testing.T) { To: 7, }}, }}, + }, { + msg: "float on a new line", + text: "f()\n.9", + nodes: []*Node{{ + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "float", + }}, + ignorePosition: true, }, { msg: "function application", text: "f()", @@ -1331,6 +1343,38 @@ func TestMML(t *testing.T) { To: 16, }}, }}, + }, { + msg: "switch, empty cases", + text: ` + switch { + case a: + case b: + default: + f() + } + `, + nodes: []*Node{{ + Name: "switch", + Nodes: []*Node{{ + Name: "case", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "case", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "default", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, }, { msg: "switch, single case", text: "switch a {case b: c}", diff --git a/notes.txt b/notes.txt index 456a631..b669134 100644 --- a/notes.txt +++ b/notes.txt @@ -2,6 +2,7 @@ cleanup error reporting custom tokens indentation +streaming benchmarking code generation go code generation js diff --git a/parse.go b/parse.go index c091834..8140e99 100644 --- a/parse.go +++ b/parse.go @@ -11,7 +11,7 @@ type definition interface { type parser interface { nodeName() string setIncludedBy(parser, []string) - cacheIncluded(*context, *Node) + storeIncluded(*context, *Node) parse(Trace, *context) } diff --git a/scheme.parser b/scheme.parser index 28cf423..67bfd7f 100644 --- a/scheme.parser +++ b/scheme.parser @@ -1,5 +1,3 @@ -// TODO: comment - ws:alias = [ \b\f\n\r\t\v]; comment:alias = ";" [^\n]*; wsc:alias = ws | comment; diff --git a/sequence.go b/sequence.go index be791cd..f334f5a 100644 --- a/sequence.go +++ b/sequence.go @@ -99,17 +99,17 @@ func (p *sequenceParser) setIncludedBy(includedBy parser, path []string) { p.includedBy = append(p.includedBy, includedBy) } -func (p *sequenceParser) cacheIncluded(c *context, n *Node) { +func (p *sequenceParser) storeIncluded(c *context, n *Node) { if !c.excluded(n.From, p.name) { return } nc := newNode(p.name, n.From, n.To, p.commit) nc.append(n) - c.cache.set(nc.From, p.name, nc) + c.store.set(nc.From, p.name, nc) for _, includedBy := range p.includedBy { - includedBy.cacheIncluded(c, nc) + includedBy.storeIncluded(c, nc) } } @@ -138,9 +138,9 @@ func (p *sequenceParser) parse(t Trace, c *context) { node := newNode(p.name, c.offset, c.offset, p.commit) for len(items) > 0 { - m, ok := c.fromCache(items[0].nodeName()) + m, ok := c.fromStore(items[0].nodeName()) if ok { - // t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset) + // t.Out1("sequence item found in store, match:", m, items[0].nodeName(), c.offset) } else { items[0].parse(t, c) m = c.match @@ -149,7 +149,7 @@ func (p *sequenceParser) parse(t Trace, c *context) { if !m { if currentCount < ranges[0][0] { // t.Out1("fail, item failed") - c.cache.set(node.From, p.name, nil) + c.store.set(node.From, p.name, nil) c.fail(node.From) return } @@ -174,9 +174,9 @@ func (p *sequenceParser) parse(t Trace, c *context) { // t.Out1("success, items parsed") - c.cache.set(node.From, p.name, node) + c.store.set(node.From, p.name, node) for _, includedBy := range p.includedBy { - includedBy.cacheIncluded(c, node) + includedBy.storeIncluded(c, node) } c.success(node) diff --git a/cache.go b/store.go similarity index 70% rename from cache.go rename to store.go index 49d6dc6..473a2b4 100644 --- a/cache.go +++ b/store.go @@ -1,22 +1,20 @@ package parse -// TODO: rename to store - -type cacheItem struct { +type storedItem struct { name string node *Node } -type tokenCache struct { - match []*cacheItem // TODO: potential optimization can be to use a balanced binary tree +type storeEntry struct { + match []*storedItem noMatch []string } -type cache struct { - tokens []*tokenCache // TODO: try with pointers, too +type store struct { + tokens []*storeEntry } -func (c *cache) get(offset int, name string) (*Node, bool, bool) { +func (c *store) get(offset int, name string) (*Node, bool, bool) { if len(c.tokens) <= offset { return nil, false, false } @@ -41,8 +39,8 @@ func (c *cache) get(offset int, name string) (*Node, bool, bool) { return nil, false, false } -func (c *cache) set(offset int, name string, n *Node) { - var tc *tokenCache +func (c *store) set(offset int, name string, n *Node) { + var tc *storeEntry if len(c.tokens) > offset { tc = c.tokens[offset] } else { @@ -55,7 +53,7 @@ func (c *cache) set(offset int, name string, n *Node) { } } - tc = &tokenCache{} + tc = &storeEntry{} c.tokens[offset] = tc } @@ -86,7 +84,7 @@ func (c *cache) set(offset int, name string, n *Node) { } } - tc.match = append(tc.match, &cacheItem{ + tc.match = append(tc.match, &storedItem{ name: name, node: n, }) diff --git a/syntax.go b/syntax.go index 08d5e55..6216f8e 100644 --- a/syntax.go +++ b/syntax.go @@ -36,7 +36,8 @@ var ( ErrInitFailed = errors.New("init failed") ErrNoParsersDefined = errors.New("no parsers defined") ErrInvalidInput = errors.New("invalid input") - ErrInvalidCharacter = errors.New("invalid character") // TODO: fix two use cases, utf8 and boot + ErrInvalidUnicodeCharacter = errors.New("invalid unicode character") + ErrInvalidEscapeCharacter = errors.New("invalid escape character") ErrUnexpectedCharacter = errors.New("unexpected character") ErrInvalidSyntax = errors.New("invalid syntax") ErrRootAlias = errors.New("root node cannot be an alias") diff --git a/syntax.parser b/syntax.parser index d5d454f..5bb8d8b 100644 --- a/syntax.parser +++ b/syntax.parser @@ -9,25 +9,16 @@ comment = comment-segment (ws-no-nl* "\n"? ws-no-nl* comment-segme any-char = "."; // equivalent to [^] -// TODO: document matching terminal: [] - -// TODO: handle char class equivalences - -// TODO: enable streaming - -// TODO: set route function in generated code? - // caution: newline is accepted class-not = "^"; class-char = [^\\\[\]\^\-] | "\\" .; char-range = class-char "-" class-char; char-class = "[" class-not? (class-char | char-range)* "]"; -// caution: newline is accepted +// newline is accepted sequence-char = [^\\"] | "\\" .; char-sequence = "\"" sequence-char* "\""; -// TODO: this can be mixed up with sequence. Is it fine? fix this, see mml symbol terminal:alias = any-char | char-class | char-sequence; symbol = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+; @@ -50,11 +41,11 @@ quantity:alias = count-quantifier | zero-or-one; item = (terminal | symbol | group) quantity?; -sequence = item (wsc* item)*; // TODO: why was this '+'? +sequence = item (wsc* item)*; element:alias = terminal | symbol | group | sequence; -// DOC: once cached, doesn't try again, even in a new context, therefore the order may matter +// DOC: how the order matters choice = element (wsc* "|" wsc* element)+; // DOC: not having 'not' needs some tricks sometimes