generic cleanup

This commit is contained in:
Arpad Ryszka 2017-06-26 02:20:23 +02:00
parent 5d38caf222
commit 1e83b66ba5
16 changed files with 99 additions and 72 deletions

View File

@ -1,5 +1,9 @@
generator, in-process init or command line
syntax from file or in-memory
syntax from text or defined during runtime, or combined
simple syntax with recursion
no lexer required
utf8, 8bit or custom tokens
abstract syntax tree from text of arbitrary syntax
reading from streams
context free, however support for custom tokens in the input
custom tokens for indentation built in

12
char.go
View File

@ -52,7 +52,7 @@ func (p *charParser) setIncludedBy(includedBy parser, path []string) {
p.includedBy = append(p.includedBy, includedBy)
}
func (p *charParser) cacheIncluded(*context, *Node) {
func (p *charParser) storeIncluded(*context, *Node) {
panic(cannotIncludeParsers(p.name))
}
@ -82,24 +82,24 @@ func (p *charParser) parse(t Trace, c *context) {
return
}
if _, ok := c.fromCache(p.name); ok {
// t.Out1("found in cache, match:", m)
if _, ok := c.fromStore(p.name); ok {
// t.Out1("found in store, match:", m)
return
}
if tok, ok := c.token(); ok && p.match(tok) {
// t.Out1("success", string(tok))
n := newNode(p.name, c.offset, c.offset+1, p.commit)
c.cache.set(c.offset, p.name, n)
c.store.set(c.offset, p.name, n)
for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, n)
includedBy.storeIncluded(c, n)
}
c.success(n)
return
} else {
// t.Out1("fail", string(tok))
c.cache.set(c.offset, p.name, nil)
c.store.set(c.offset, p.name, nil)
c.fail(c.offset)
return
}

View File

@ -78,17 +78,17 @@ func (p *choiceParser) setIncludedBy(includedBy parser, path []string) {
p.includedBy = append(p.includedBy, includedBy)
}
func (p *choiceParser) cacheIncluded(c *context, n *Node) {
func (p *choiceParser) storeIncluded(c *context, n *Node) {
if !c.excluded(n.From, p.name) {
return
}
nc := newNode(p.name, n.From, n.To, p.commit)
nc.append(n)
c.cache.set(nc.From, p.name, nc)
c.store.set(nc.From, p.name, nc)
for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, nc)
includedBy.storeIncluded(c, nc)
}
}
@ -102,8 +102,8 @@ func (p *choiceParser) parse(t Trace, c *context) {
return
}
if _, ok := c.fromCache(p.name); ok {
// t.Out1("found in cache, match:", m)
if _, ok := c.fromStore(p.name); ok {
// t.Out1("found in store, match:", m)
return
}
@ -137,9 +137,9 @@ func (p *choiceParser) parse(t Trace, c *context) {
node = newNode(p.name, c.offset, c.offset, p.commit)
node.append(c.node)
c.cache.set(node.From, p.name, node)
c.store.set(node.From, p.name, node)
for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, node)
includedBy.storeIncluded(c, node)
}
}
@ -155,6 +155,6 @@ func (p *choiceParser) parse(t Trace, c *context) {
}
// t.Out1("fail")
c.cache.set(node.From, p.name, nil)
c.store.set(node.From, p.name, nil)
c.fail(node.From)
}

View File

@ -11,7 +11,7 @@ type context struct {
readOffset int
readErr error
eof bool
cache *cache
store *store
tokens []rune
match bool
node *Node
@ -21,7 +21,7 @@ type context struct {
func newContext(r io.RuneReader) *context {
return &context{
reader: r,
cache: &cache{},
store: &store{},
}
}
@ -46,7 +46,7 @@ func (c *context) read() bool {
c.readOffset++
if t == unicode.ReplacementChar {
c.readErr = ErrInvalidCharacter
c.readErr = ErrInvalidUnicodeCharacter
return false
}
@ -105,8 +105,8 @@ func (c *context) include(offset int, name string) {
}
}
func (c *context) fromCache(name string) (bool, bool) {
n, m, ok := c.cache.get(c.offset, name)
func (c *context) fromStore(name string) (bool, bool) {
n, m, ok := c.store.get(c.offset, name)
if !ok {
return false, false
}

View File

@ -48,14 +48,14 @@ func unescape(escape rune, banned []rune, chars []rune) ([]rune, error) {
case ci == escape:
escaped = true
case runesContain(banned, ci):
return nil, ErrInvalidCharacter
return nil, ErrInvalidEscapeCharacter
default:
unescaped = append(unescaped, ci)
}
}
if escaped {
return nil, ErrInvalidCharacter
return nil, ErrInvalidEscapeCharacter
}
return unescaped, nil

View File

@ -10,5 +10,3 @@ object = "{" ws* (entry (ws* "," ws* entry)*)? ws* "}";
array = "[" ws* (value (ws* "," ws* value)*)? ws* "]";
value:alias = true | false | null | string | number | object | array;
json = value;
// TODO: value should be an alias but test it first like this

View File

@ -22,8 +22,6 @@ key-val = (comment "\n" ws*)? (key | key? ws* "=" ws* value?);
entry:alias = group-key | key-val;
doc:root = (entry (ws* comment-line)? | wsnlc)*;
// TODO: not tested
// set as root for streaming:
single-entry = (entry (ws* comment-line)?
| wsnlc* entry (ws* comment-line)?)
[];

View File

@ -101,7 +101,6 @@ channel = "<>" | "<" wsnlc* int wsnlc* ">";
and-expression:doc = "and" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
or-expression:doc = "or" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
// TODO: use collect
argument-list:alias = static-symbol (list-sep static-symbol)*;
collect-symbol = "..." wsnlc* static-symbol;
function-fact:alias = "(" (wsnlc | ",")*
@ -128,7 +127,7 @@ range-to = expression;
range-expression:alias = range-from? wsnlc* ":" wsnlc* range-to?;
indexer-expression:alias = expression | range-expression;
expression-indexer:alias = primary-expression wsc* "[" wsnlc* indexer-expression wsnlc* "]";
symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression; // TODO: test with a float on a new line
symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression;
indexer = expression-indexer | symbol-indexer;
function-application = primary-expression wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
@ -140,11 +139,10 @@ if = "if" wsnlc* expression wsnlc* block
default = "default" wsnlc* ":";
default-line:alias = default (wsnlc | ";")* statement?;
case = "case" wsnlc* expression wsnlc* ":";
case-line:alias = case (wsnlc | ";")* statement?;
case-line:alias = case (wsc | ";")* statement?;
switch = "switch" wsnlc* expression? wsnlc* "{" (wsnlc | ";")*
((case-line | default-line) (sep (case-line | default-line | statement))*)?
(wsnlc | ";")* "}";
// TODO: empty case not handled
int-type = "int";
float-type = "float";
@ -252,7 +250,7 @@ match-set:alias = type-set | match-fact;
match-expression:alias = match-set | static-symbol wsc* match-set;
match-case = "case" wsnlc* match-expression wsnlc* ":";
match-case-line:alias = match-case (wsnlc | ";")* statement?;
match-case-line:alias = match-case (wsc | ";")* statement?;
match = "match" wsnlc* expression wsnlc* "{" (wsnlc | ";")*
((match-case-line | default-line)
(sep (match-case-line | default-line | statement))*)?
@ -286,7 +284,7 @@ communication-group:alias = "(" wsnlc* communication wsnlc* ")";
communication:alias = receive-expression | receive-statement | send | communication-group;
select-case = "case" wsnlc* communication wsnlc* ":";
select-case-line:alias = select-case (wsnlc | ";")* statement?;
select-case-line:alias = select-case (wsc | ";")* statement?;
select = "select" wsnlc* "{" (wsnlc | ";")*
((select-case-line | default-line)
(sep (select-case-line | default-line | statement))*)?
@ -497,10 +495,6 @@ definition:alias = value-definition
| function-definition-group
| effect-definition-group;
// TODO: cannot do:
// type alias a int|fn () string|error
// needs grouping of type-set
type-alias = "type" wsnlc* "alias" wsnlc* static-symbol wsnlc* type-set;
type-constraint = "type" wsnlc* static-symbol wsnlc* type-set;

View File

@ -1028,6 +1028,18 @@ func TestMML(t *testing.T) {
To: 7,
}},
}},
}, {
msg: "float on a new line",
text: "f()\n.9",
nodes: []*Node{{
Name: "function-application",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "float",
}},
ignorePosition: true,
}, {
msg: "function application",
text: "f()",
@ -1331,6 +1343,38 @@ func TestMML(t *testing.T) {
To: 16,
}},
}},
}, {
msg: "switch, empty cases",
text: `
switch {
case a:
case b:
default:
f()
}
`,
nodes: []*Node{{
Name: "switch",
Nodes: []*Node{{
Name: "case",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "case",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "default",
}, {
Name: "function-application",
Nodes: []*Node{{
Name: "symbol",
}},
}},
}},
ignorePosition: true,
}, {
msg: "switch, single case",
text: "switch a {case b: c}",

View File

@ -2,6 +2,7 @@ cleanup
error reporting
custom tokens
indentation
streaming
benchmarking
code generation go
code generation js

View File

@ -11,7 +11,7 @@ type definition interface {
type parser interface {
nodeName() string
setIncludedBy(parser, []string)
cacheIncluded(*context, *Node)
storeIncluded(*context, *Node)
parse(Trace, *context)
}

View File

@ -1,5 +1,3 @@
// TODO: comment
ws:alias = [ \b\f\n\r\t\v];
comment:alias = ";" [^\n]*;
wsc:alias = ws | comment;

View File

@ -99,17 +99,17 @@ func (p *sequenceParser) setIncludedBy(includedBy parser, path []string) {
p.includedBy = append(p.includedBy, includedBy)
}
func (p *sequenceParser) cacheIncluded(c *context, n *Node) {
func (p *sequenceParser) storeIncluded(c *context, n *Node) {
if !c.excluded(n.From, p.name) {
return
}
nc := newNode(p.name, n.From, n.To, p.commit)
nc.append(n)
c.cache.set(nc.From, p.name, nc)
c.store.set(nc.From, p.name, nc)
for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, nc)
includedBy.storeIncluded(c, nc)
}
}
@ -138,9 +138,9 @@ func (p *sequenceParser) parse(t Trace, c *context) {
node := newNode(p.name, c.offset, c.offset, p.commit)
for len(items) > 0 {
m, ok := c.fromCache(items[0].nodeName())
m, ok := c.fromStore(items[0].nodeName())
if ok {
// t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset)
// t.Out1("sequence item found in store, match:", m, items[0].nodeName(), c.offset)
} else {
items[0].parse(t, c)
m = c.match
@ -149,7 +149,7 @@ func (p *sequenceParser) parse(t Trace, c *context) {
if !m {
if currentCount < ranges[0][0] {
// t.Out1("fail, item failed")
c.cache.set(node.From, p.name, nil)
c.store.set(node.From, p.name, nil)
c.fail(node.From)
return
}
@ -174,9 +174,9 @@ func (p *sequenceParser) parse(t Trace, c *context) {
// t.Out1("success, items parsed")
c.cache.set(node.From, p.name, node)
c.store.set(node.From, p.name, node)
for _, includedBy := range p.includedBy {
includedBy.cacheIncluded(c, node)
includedBy.storeIncluded(c, node)
}
c.success(node)

View File

@ -1,22 +1,20 @@
package parse
// TODO: rename to store
type cacheItem struct {
type storedItem struct {
name string
node *Node
}
type tokenCache struct {
match []*cacheItem // TODO: potential optimization can be to use a balanced binary tree
type storeEntry struct {
match []*storedItem
noMatch []string
}
type cache struct {
tokens []*tokenCache // TODO: try with pointers, too
type store struct {
tokens []*storeEntry
}
func (c *cache) get(offset int, name string) (*Node, bool, bool) {
func (c *store) get(offset int, name string) (*Node, bool, bool) {
if len(c.tokens) <= offset {
return nil, false, false
}
@ -41,8 +39,8 @@ func (c *cache) get(offset int, name string) (*Node, bool, bool) {
return nil, false, false
}
func (c *cache) set(offset int, name string, n *Node) {
var tc *tokenCache
func (c *store) set(offset int, name string, n *Node) {
var tc *storeEntry
if len(c.tokens) > offset {
tc = c.tokens[offset]
} else {
@ -55,7 +53,7 @@ func (c *cache) set(offset int, name string, n *Node) {
}
}
tc = &tokenCache{}
tc = &storeEntry{}
c.tokens[offset] = tc
}
@ -86,7 +84,7 @@ func (c *cache) set(offset int, name string, n *Node) {
}
}
tc.match = append(tc.match, &cacheItem{
tc.match = append(tc.match, &storedItem{
name: name,
node: n,
})

View File

@ -36,7 +36,8 @@ var (
ErrInitFailed = errors.New("init failed")
ErrNoParsersDefined = errors.New("no parsers defined")
ErrInvalidInput = errors.New("invalid input")
ErrInvalidCharacter = errors.New("invalid character") // TODO: fix two use cases, utf8 and boot
ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
ErrInvalidEscapeCharacter = errors.New("invalid escape character")
ErrUnexpectedCharacter = errors.New("unexpected character")
ErrInvalidSyntax = errors.New("invalid syntax")
ErrRootAlias = errors.New("root node cannot be an alias")

View File

@ -9,25 +9,16 @@ comment = comment-segment (ws-no-nl* "\n"? ws-no-nl* comment-segme
any-char = "."; // equivalent to [^]
// TODO: document matching terminal: []
// TODO: handle char class equivalences
// TODO: enable streaming
// TODO: set route function in generated code?
// caution: newline is accepted
class-not = "^";
class-char = [^\\\[\]\^\-] | "\\" .;
char-range = class-char "-" class-char;
char-class = "[" class-not? (class-char | char-range)* "]";
// caution: newline is accepted
// newline is accepted
sequence-char = [^\\"] | "\\" .;
char-sequence = "\"" sequence-char* "\"";
// TODO: this can be mixed up with sequence. Is it fine? fix this, see mml symbol
terminal:alias = any-char | char-class | char-sequence;
symbol = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+;
@ -50,11 +41,11 @@ quantity:alias = count-quantifier
| zero-or-one;
item = (terminal | symbol | group) quantity?;
sequence = item (wsc* item)*; // TODO: why was this '+'?
sequence = item (wsc* item)*;
element:alias = terminal | symbol | group | sequence;
// DOC: once cached, doesn't try again, even in a new context, therefore the order may matter
// DOC: how the order matters
choice = element (wsc* "|" wsc* element)+;
// DOC: not having 'not' needs some tricks sometimes