generic cleanup

This commit is contained in:
Arpad Ryszka 2017-06-26 02:20:23 +02:00
parent 5d38caf222
commit 1e83b66ba5
16 changed files with 99 additions and 72 deletions

View File

@ -1,5 +1,9 @@
generator, in-process init or command line generator, in-process init or command line
syntax from file or in-memory syntax from text or defined during runtime, or combined
simple syntax with recursion simple syntax with recursion
no lexer required no lexer required
utf8, 8bit or custom tokens utf8, 8bit or custom tokens
abstract syntax tree from text of arbitrary syntax
reading from streams
context free, however support for custom tokens in the input
custom tokens for indentation built in

12
char.go
View File

@ -52,7 +52,7 @@ func (p *charParser) setIncludedBy(includedBy parser, path []string) {
p.includedBy = append(p.includedBy, includedBy) p.includedBy = append(p.includedBy, includedBy)
} }
func (p *charParser) cacheIncluded(*context, *Node) { func (p *charParser) storeIncluded(*context, *Node) {
panic(cannotIncludeParsers(p.name)) panic(cannotIncludeParsers(p.name))
} }
@ -82,24 +82,24 @@ func (p *charParser) parse(t Trace, c *context) {
return return
} }
if _, ok := c.fromCache(p.name); ok { if _, ok := c.fromStore(p.name); ok {
// t.Out1("found in cache, match:", m) // t.Out1("found in store, match:", m)
return return
} }
if tok, ok := c.token(); ok && p.match(tok) { if tok, ok := c.token(); ok && p.match(tok) {
// t.Out1("success", string(tok)) // t.Out1("success", string(tok))
n := newNode(p.name, c.offset, c.offset+1, p.commit) n := newNode(p.name, c.offset, c.offset+1, p.commit)
c.cache.set(c.offset, p.name, n) c.store.set(c.offset, p.name, n)
for _, includedBy := range p.includedBy { for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, n) includedBy.storeIncluded(c, n)
} }
c.success(n) c.success(n)
return return
} else { } else {
// t.Out1("fail", string(tok)) // t.Out1("fail", string(tok))
c.cache.set(c.offset, p.name, nil) c.store.set(c.offset, p.name, nil)
c.fail(c.offset) c.fail(c.offset)
return return
} }

View File

@ -78,17 +78,17 @@ func (p *choiceParser) setIncludedBy(includedBy parser, path []string) {
p.includedBy = append(p.includedBy, includedBy) p.includedBy = append(p.includedBy, includedBy)
} }
func (p *choiceParser) cacheIncluded(c *context, n *Node) { func (p *choiceParser) storeIncluded(c *context, n *Node) {
if !c.excluded(n.From, p.name) { if !c.excluded(n.From, p.name) {
return return
} }
nc := newNode(p.name, n.From, n.To, p.commit) nc := newNode(p.name, n.From, n.To, p.commit)
nc.append(n) nc.append(n)
c.cache.set(nc.From, p.name, nc) c.store.set(nc.From, p.name, nc)
for _, includedBy := range p.includedBy { for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, nc) includedBy.storeIncluded(c, nc)
} }
} }
@ -102,8 +102,8 @@ func (p *choiceParser) parse(t Trace, c *context) {
return return
} }
if _, ok := c.fromCache(p.name); ok { if _, ok := c.fromStore(p.name); ok {
// t.Out1("found in cache, match:", m) // t.Out1("found in store, match:", m)
return return
} }
@ -137,9 +137,9 @@ func (p *choiceParser) parse(t Trace, c *context) {
node = newNode(p.name, c.offset, c.offset, p.commit) node = newNode(p.name, c.offset, c.offset, p.commit)
node.append(c.node) node.append(c.node)
c.cache.set(node.From, p.name, node) c.store.set(node.From, p.name, node)
for _, includedBy := range p.includedBy { for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, node) includedBy.storeIncluded(c, node)
} }
} }
@ -155,6 +155,6 @@ func (p *choiceParser) parse(t Trace, c *context) {
} }
// t.Out1("fail") // t.Out1("fail")
c.cache.set(node.From, p.name, nil) c.store.set(node.From, p.name, nil)
c.fail(node.From) c.fail(node.From)
} }

View File

@ -11,7 +11,7 @@ type context struct {
readOffset int readOffset int
readErr error readErr error
eof bool eof bool
cache *cache store *store
tokens []rune tokens []rune
match bool match bool
node *Node node *Node
@ -21,7 +21,7 @@ type context struct {
func newContext(r io.RuneReader) *context { func newContext(r io.RuneReader) *context {
return &context{ return &context{
reader: r, reader: r,
cache: &cache{}, store: &store{},
} }
} }
@ -46,7 +46,7 @@ func (c *context) read() bool {
c.readOffset++ c.readOffset++
if t == unicode.ReplacementChar { if t == unicode.ReplacementChar {
c.readErr = ErrInvalidCharacter c.readErr = ErrInvalidUnicodeCharacter
return false return false
} }
@ -105,8 +105,8 @@ func (c *context) include(offset int, name string) {
} }
} }
func (c *context) fromCache(name string) (bool, bool) { func (c *context) fromStore(name string) (bool, bool) {
n, m, ok := c.cache.get(c.offset, name) n, m, ok := c.store.get(c.offset, name)
if !ok { if !ok {
return false, false return false, false
} }

View File

@ -48,14 +48,14 @@ func unescape(escape rune, banned []rune, chars []rune) ([]rune, error) {
case ci == escape: case ci == escape:
escaped = true escaped = true
case runesContain(banned, ci): case runesContain(banned, ci):
return nil, ErrInvalidCharacter return nil, ErrInvalidEscapeCharacter
default: default:
unescaped = append(unescaped, ci) unescaped = append(unescaped, ci)
} }
} }
if escaped { if escaped {
return nil, ErrInvalidCharacter return nil, ErrInvalidEscapeCharacter
} }
return unescaped, nil return unescaped, nil

View File

@ -10,5 +10,3 @@ object = "{" ws* (entry (ws* "," ws* entry)*)? ws* "}";
array = "[" ws* (value (ws* "," ws* value)*)? ws* "]"; array = "[" ws* (value (ws* "," ws* value)*)? ws* "]";
value:alias = true | false | null | string | number | object | array; value:alias = true | false | null | string | number | object | array;
json = value; json = value;
// TODO: value should be an alias but test it first like this

View File

@ -22,8 +22,6 @@ key-val = (comment "\n" ws*)? (key | key? ws* "=" ws* value?);
entry:alias = group-key | key-val; entry:alias = group-key | key-val;
doc:root = (entry (ws* comment-line)? | wsnlc)*; doc:root = (entry (ws* comment-line)? | wsnlc)*;
// TODO: not tested
// set as root for streaming:
single-entry = (entry (ws* comment-line)? single-entry = (entry (ws* comment-line)?
| wsnlc* entry (ws* comment-line)?) | wsnlc* entry (ws* comment-line)?)
[]; [];

View File

@ -101,7 +101,6 @@ channel = "<>" | "<" wsnlc* int wsnlc* ">";
and-expression:doc = "and" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; and-expression:doc = "and" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
or-expression:doc = "or" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; or-expression:doc = "or" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
// TODO: use collect
argument-list:alias = static-symbol (list-sep static-symbol)*; argument-list:alias = static-symbol (list-sep static-symbol)*;
collect-symbol = "..." wsnlc* static-symbol; collect-symbol = "..." wsnlc* static-symbol;
function-fact:alias = "(" (wsnlc | ",")* function-fact:alias = "(" (wsnlc | ",")*
@ -128,7 +127,7 @@ range-to = expression;
range-expression:alias = range-from? wsnlc* ":" wsnlc* range-to?; range-expression:alias = range-from? wsnlc* ":" wsnlc* range-to?;
indexer-expression:alias = expression | range-expression; indexer-expression:alias = expression | range-expression;
expression-indexer:alias = primary-expression wsc* "[" wsnlc* indexer-expression wsnlc* "]"; expression-indexer:alias = primary-expression wsc* "[" wsnlc* indexer-expression wsnlc* "]";
symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression; // TODO: test with a float on a new line symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression;
indexer = expression-indexer | symbol-indexer; indexer = expression-indexer | symbol-indexer;
function-application = primary-expression wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; function-application = primary-expression wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
@ -140,11 +139,10 @@ if = "if" wsnlc* expression wsnlc* block
default = "default" wsnlc* ":"; default = "default" wsnlc* ":";
default-line:alias = default (wsnlc | ";")* statement?; default-line:alias = default (wsnlc | ";")* statement?;
case = "case" wsnlc* expression wsnlc* ":"; case = "case" wsnlc* expression wsnlc* ":";
case-line:alias = case (wsnlc | ";")* statement?; case-line:alias = case (wsc | ";")* statement?;
switch = "switch" wsnlc* expression? wsnlc* "{" (wsnlc | ";")* switch = "switch" wsnlc* expression? wsnlc* "{" (wsnlc | ";")*
((case-line | default-line) (sep (case-line | default-line | statement))*)? ((case-line | default-line) (sep (case-line | default-line | statement))*)?
(wsnlc | ";")* "}"; (wsnlc | ";")* "}";
// TODO: empty case not handled
int-type = "int"; int-type = "int";
float-type = "float"; float-type = "float";
@ -252,7 +250,7 @@ match-set:alias = type-set | match-fact;
match-expression:alias = match-set | static-symbol wsc* match-set; match-expression:alias = match-set | static-symbol wsc* match-set;
match-case = "case" wsnlc* match-expression wsnlc* ":"; match-case = "case" wsnlc* match-expression wsnlc* ":";
match-case-line:alias = match-case (wsnlc | ";")* statement?; match-case-line:alias = match-case (wsc | ";")* statement?;
match = "match" wsnlc* expression wsnlc* "{" (wsnlc | ";")* match = "match" wsnlc* expression wsnlc* "{" (wsnlc | ";")*
((match-case-line | default-line) ((match-case-line | default-line)
(sep (match-case-line | default-line | statement))*)? (sep (match-case-line | default-line | statement))*)?
@ -286,7 +284,7 @@ communication-group:alias = "(" wsnlc* communication wsnlc* ")";
communication:alias = receive-expression | receive-statement | send | communication-group; communication:alias = receive-expression | receive-statement | send | communication-group;
select-case = "case" wsnlc* communication wsnlc* ":"; select-case = "case" wsnlc* communication wsnlc* ":";
select-case-line:alias = select-case (wsnlc | ";")* statement?; select-case-line:alias = select-case (wsc | ";")* statement?;
select = "select" wsnlc* "{" (wsnlc | ";")* select = "select" wsnlc* "{" (wsnlc | ";")*
((select-case-line | default-line) ((select-case-line | default-line)
(sep (select-case-line | default-line | statement))*)? (sep (select-case-line | default-line | statement))*)?
@ -497,10 +495,6 @@ definition:alias = value-definition
| function-definition-group | function-definition-group
| effect-definition-group; | effect-definition-group;
// TODO: cannot do:
// type alias a int|fn () string|error
// needs grouping of type-set
type-alias = "type" wsnlc* "alias" wsnlc* static-symbol wsnlc* type-set; type-alias = "type" wsnlc* "alias" wsnlc* static-symbol wsnlc* type-set;
type-constraint = "type" wsnlc* static-symbol wsnlc* type-set; type-constraint = "type" wsnlc* static-symbol wsnlc* type-set;

View File

@ -1028,6 +1028,18 @@ func TestMML(t *testing.T) {
To: 7, To: 7,
}}, }},
}}, }},
}, {
msg: "float on a new line",
text: "f()\n.9",
nodes: []*Node{{
Name: "function-application",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "float",
}},
ignorePosition: true,
}, { }, {
msg: "function application", msg: "function application",
text: "f()", text: "f()",
@ -1331,6 +1343,38 @@ func TestMML(t *testing.T) {
To: 16, To: 16,
}}, }},
}}, }},
}, {
msg: "switch, empty cases",
text: `
switch {
case a:
case b:
default:
f()
}
`,
nodes: []*Node{{
Name: "switch",
Nodes: []*Node{{
Name: "case",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "case",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "default",
}, {
Name: "function-application",
Nodes: []*Node{{
Name: "symbol",
}},
}},
}},
ignorePosition: true,
}, { }, {
msg: "switch, single case", msg: "switch, single case",
text: "switch a {case b: c}", text: "switch a {case b: c}",

View File

@ -2,6 +2,7 @@ cleanup
error reporting error reporting
custom tokens custom tokens
indentation indentation
streaming
benchmarking benchmarking
code generation go code generation go
code generation js code generation js

View File

@ -11,7 +11,7 @@ type definition interface {
type parser interface { type parser interface {
nodeName() string nodeName() string
setIncludedBy(parser, []string) setIncludedBy(parser, []string)
cacheIncluded(*context, *Node) storeIncluded(*context, *Node)
parse(Trace, *context) parse(Trace, *context)
} }

View File

@ -1,5 +1,3 @@
// TODO: comment
ws:alias = [ \b\f\n\r\t\v]; ws:alias = [ \b\f\n\r\t\v];
comment:alias = ";" [^\n]*; comment:alias = ";" [^\n]*;
wsc:alias = ws | comment; wsc:alias = ws | comment;

View File

@ -99,17 +99,17 @@ func (p *sequenceParser) setIncludedBy(includedBy parser, path []string) {
p.includedBy = append(p.includedBy, includedBy) p.includedBy = append(p.includedBy, includedBy)
} }
func (p *sequenceParser) cacheIncluded(c *context, n *Node) { func (p *sequenceParser) storeIncluded(c *context, n *Node) {
if !c.excluded(n.From, p.name) { if !c.excluded(n.From, p.name) {
return return
} }
nc := newNode(p.name, n.From, n.To, p.commit) nc := newNode(p.name, n.From, n.To, p.commit)
nc.append(n) nc.append(n)
c.cache.set(nc.From, p.name, nc) c.store.set(nc.From, p.name, nc)
for _, includedBy := range p.includedBy { for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, nc) includedBy.storeIncluded(c, nc)
} }
} }
@ -138,9 +138,9 @@ func (p *sequenceParser) parse(t Trace, c *context) {
node := newNode(p.name, c.offset, c.offset, p.commit) node := newNode(p.name, c.offset, c.offset, p.commit)
for len(items) > 0 { for len(items) > 0 {
m, ok := c.fromCache(items[0].nodeName()) m, ok := c.fromStore(items[0].nodeName())
if ok { if ok {
// t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset) // t.Out1("sequence item found in store, match:", m, items[0].nodeName(), c.offset)
} else { } else {
items[0].parse(t, c) items[0].parse(t, c)
m = c.match m = c.match
@ -149,7 +149,7 @@ func (p *sequenceParser) parse(t Trace, c *context) {
if !m { if !m {
if currentCount < ranges[0][0] { if currentCount < ranges[0][0] {
// t.Out1("fail, item failed") // t.Out1("fail, item failed")
c.cache.set(node.From, p.name, nil) c.store.set(node.From, p.name, nil)
c.fail(node.From) c.fail(node.From)
return return
} }
@ -174,9 +174,9 @@ func (p *sequenceParser) parse(t Trace, c *context) {
// t.Out1("success, items parsed") // t.Out1("success, items parsed")
c.cache.set(node.From, p.name, node) c.store.set(node.From, p.name, node)
for _, includedBy := range p.includedBy { for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, node) includedBy.storeIncluded(c, node)
} }
c.success(node) c.success(node)

View File

@ -1,22 +1,20 @@
package parse package parse
// TODO: rename to store type storedItem struct {
type cacheItem struct {
name string name string
node *Node node *Node
} }
type tokenCache struct { type storeEntry struct {
match []*cacheItem // TODO: potential optimization can be to use a balanced binary tree match []*storedItem
noMatch []string noMatch []string
} }
type cache struct { type store struct {
tokens []*tokenCache // TODO: try with pointers, too tokens []*storeEntry
} }
func (c *cache) get(offset int, name string) (*Node, bool, bool) { func (c *store) get(offset int, name string) (*Node, bool, bool) {
if len(c.tokens) <= offset { if len(c.tokens) <= offset {
return nil, false, false return nil, false, false
} }
@ -41,8 +39,8 @@ func (c *cache) get(offset int, name string) (*Node, bool, bool) {
return nil, false, false return nil, false, false
} }
func (c *cache) set(offset int, name string, n *Node) { func (c *store) set(offset int, name string, n *Node) {
var tc *tokenCache var tc *storeEntry
if len(c.tokens) > offset { if len(c.tokens) > offset {
tc = c.tokens[offset] tc = c.tokens[offset]
} else { } else {
@ -55,7 +53,7 @@ func (c *cache) set(offset int, name string, n *Node) {
} }
} }
tc = &tokenCache{} tc = &storeEntry{}
c.tokens[offset] = tc c.tokens[offset] = tc
} }
@ -86,7 +84,7 @@ func (c *cache) set(offset int, name string, n *Node) {
} }
} }
tc.match = append(tc.match, &cacheItem{ tc.match = append(tc.match, &storedItem{
name: name, name: name,
node: n, node: n,
}) })

View File

@ -36,7 +36,8 @@ var (
ErrInitFailed = errors.New("init failed") ErrInitFailed = errors.New("init failed")
ErrNoParsersDefined = errors.New("no parsers defined") ErrNoParsersDefined = errors.New("no parsers defined")
ErrInvalidInput = errors.New("invalid input") ErrInvalidInput = errors.New("invalid input")
ErrInvalidCharacter = errors.New("invalid character") // TODO: fix two use cases, utf8 and boot ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
ErrInvalidEscapeCharacter = errors.New("invalid escape character")
ErrUnexpectedCharacter = errors.New("unexpected character") ErrUnexpectedCharacter = errors.New("unexpected character")
ErrInvalidSyntax = errors.New("invalid syntax") ErrInvalidSyntax = errors.New("invalid syntax")
ErrRootAlias = errors.New("root node cannot be an alias") ErrRootAlias = errors.New("root node cannot be an alias")

View File

@ -9,25 +9,16 @@ comment = comment-segment (ws-no-nl* "\n"? ws-no-nl* comment-segme
any-char = "."; // equivalent to [^] any-char = "."; // equivalent to [^]
// TODO: document matching terminal: []
// TODO: handle char class equivalences
// TODO: enable streaming
// TODO: set route function in generated code?
// caution: newline is accepted // caution: newline is accepted
class-not = "^"; class-not = "^";
class-char = [^\\\[\]\^\-] | "\\" .; class-char = [^\\\[\]\^\-] | "\\" .;
char-range = class-char "-" class-char; char-range = class-char "-" class-char;
char-class = "[" class-not? (class-char | char-range)* "]"; char-class = "[" class-not? (class-char | char-range)* "]";
// caution: newline is accepted // newline is accepted
sequence-char = [^\\"] | "\\" .; sequence-char = [^\\"] | "\\" .;
char-sequence = "\"" sequence-char* "\""; char-sequence = "\"" sequence-char* "\"";
// TODO: this can be mixed up with sequence. Is it fine? fix this, see mml symbol
terminal:alias = any-char | char-class | char-sequence; terminal:alias = any-char | char-class | char-sequence;
symbol = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+; symbol = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+;
@ -50,11 +41,11 @@ quantity:alias = count-quantifier
| zero-or-one; | zero-or-one;
item = (terminal | symbol | group) quantity?; item = (terminal | symbol | group) quantity?;
sequence = item (wsc* item)*; // TODO: why was this '+'? sequence = item (wsc* item)*;
element:alias = terminal | symbol | group | sequence; element:alias = terminal | symbol | group | sequence;
// DOC: once cached, doesn't try again, even in a new context, therefore the order may matter // DOC: how the order matters
choice = element (wsc* "|" wsc* element)+; choice = element (wsc* "|" wsc* element)+;
// DOC: not having 'not' needs some tricks sometimes // DOC: not having 'not' needs some tricks sometimes