generic cleanup
This commit is contained in:
parent
5d38caf222
commit
1e83b66ba5
6
buzz.txt
6
buzz.txt
@ -1,5 +1,9 @@
|
||||
generator, in-process init or command line
|
||||
syntax from file or in-memory
|
||||
syntax from text or defined during runtime, or combined
|
||||
simple syntax with recursion
|
||||
no lexer required
|
||||
utf8, 8bit or custom tokens
|
||||
abstract syntax tree from text of arbitrary syntax
|
||||
reading from streams
|
||||
context free, however support for custom tokens in the input
|
||||
custom tokens for indentation built in
|
||||
|
12
char.go
12
char.go
@ -52,7 +52,7 @@ func (p *charParser) setIncludedBy(includedBy parser, path []string) {
|
||||
p.includedBy = append(p.includedBy, includedBy)
|
||||
}
|
||||
|
||||
func (p *charParser) cacheIncluded(*context, *Node) {
|
||||
func (p *charParser) storeIncluded(*context, *Node) {
|
||||
panic(cannotIncludeParsers(p.name))
|
||||
}
|
||||
|
||||
@ -82,24 +82,24 @@ func (p *charParser) parse(t Trace, c *context) {
|
||||
return
|
||||
}
|
||||
|
||||
if _, ok := c.fromCache(p.name); ok {
|
||||
// t.Out1("found in cache, match:", m)
|
||||
if _, ok := c.fromStore(p.name); ok {
|
||||
// t.Out1("found in store, match:", m)
|
||||
return
|
||||
}
|
||||
|
||||
if tok, ok := c.token(); ok && p.match(tok) {
|
||||
// t.Out1("success", string(tok))
|
||||
n := newNode(p.name, c.offset, c.offset+1, p.commit)
|
||||
c.cache.set(c.offset, p.name, n)
|
||||
c.store.set(c.offset, p.name, n)
|
||||
for _, includedBy := range p.includedBy {
|
||||
includedBy.cacheIncluded(c, n)
|
||||
includedBy.storeIncluded(c, n)
|
||||
}
|
||||
|
||||
c.success(n)
|
||||
return
|
||||
} else {
|
||||
// t.Out1("fail", string(tok))
|
||||
c.cache.set(c.offset, p.name, nil)
|
||||
c.store.set(c.offset, p.name, nil)
|
||||
c.fail(c.offset)
|
||||
return
|
||||
}
|
||||
|
16
choice.go
16
choice.go
@ -78,17 +78,17 @@ func (p *choiceParser) setIncludedBy(includedBy parser, path []string) {
|
||||
p.includedBy = append(p.includedBy, includedBy)
|
||||
}
|
||||
|
||||
func (p *choiceParser) cacheIncluded(c *context, n *Node) {
|
||||
func (p *choiceParser) storeIncluded(c *context, n *Node) {
|
||||
if !c.excluded(n.From, p.name) {
|
||||
return
|
||||
}
|
||||
|
||||
nc := newNode(p.name, n.From, n.To, p.commit)
|
||||
nc.append(n)
|
||||
c.cache.set(nc.From, p.name, nc)
|
||||
c.store.set(nc.From, p.name, nc)
|
||||
|
||||
for _, includedBy := range p.includedBy {
|
||||
includedBy.cacheIncluded(c, nc)
|
||||
includedBy.storeIncluded(c, nc)
|
||||
}
|
||||
}
|
||||
|
||||
@ -102,8 +102,8 @@ func (p *choiceParser) parse(t Trace, c *context) {
|
||||
return
|
||||
}
|
||||
|
||||
if _, ok := c.fromCache(p.name); ok {
|
||||
// t.Out1("found in cache, match:", m)
|
||||
if _, ok := c.fromStore(p.name); ok {
|
||||
// t.Out1("found in store, match:", m)
|
||||
return
|
||||
}
|
||||
|
||||
@ -137,9 +137,9 @@ func (p *choiceParser) parse(t Trace, c *context) {
|
||||
node = newNode(p.name, c.offset, c.offset, p.commit)
|
||||
node.append(c.node)
|
||||
|
||||
c.cache.set(node.From, p.name, node)
|
||||
c.store.set(node.From, p.name, node)
|
||||
for _, includedBy := range p.includedBy {
|
||||
includedBy.cacheIncluded(c, node)
|
||||
includedBy.storeIncluded(c, node)
|
||||
}
|
||||
}
|
||||
|
||||
@ -155,6 +155,6 @@ func (p *choiceParser) parse(t Trace, c *context) {
|
||||
}
|
||||
|
||||
// t.Out1("fail")
|
||||
c.cache.set(node.From, p.name, nil)
|
||||
c.store.set(node.From, p.name, nil)
|
||||
c.fail(node.From)
|
||||
}
|
||||
|
10
context.go
10
context.go
@ -11,7 +11,7 @@ type context struct {
|
||||
readOffset int
|
||||
readErr error
|
||||
eof bool
|
||||
cache *cache
|
||||
store *store
|
||||
tokens []rune
|
||||
match bool
|
||||
node *Node
|
||||
@ -21,7 +21,7 @@ type context struct {
|
||||
func newContext(r io.RuneReader) *context {
|
||||
return &context{
|
||||
reader: r,
|
||||
cache: &cache{},
|
||||
store: &store{},
|
||||
}
|
||||
}
|
||||
|
||||
@ -46,7 +46,7 @@ func (c *context) read() bool {
|
||||
c.readOffset++
|
||||
|
||||
if t == unicode.ReplacementChar {
|
||||
c.readErr = ErrInvalidCharacter
|
||||
c.readErr = ErrInvalidUnicodeCharacter
|
||||
return false
|
||||
}
|
||||
|
||||
@ -105,8 +105,8 @@ func (c *context) include(offset int, name string) {
|
||||
}
|
||||
}
|
||||
|
||||
func (c *context) fromCache(name string) (bool, bool) {
|
||||
n, m, ok := c.cache.get(c.offset, name)
|
||||
func (c *context) fromStore(name string) (bool, bool) {
|
||||
n, m, ok := c.store.get(c.offset, name)
|
||||
if !ok {
|
||||
return false, false
|
||||
}
|
||||
|
@ -48,14 +48,14 @@ func unescape(escape rune, banned []rune, chars []rune) ([]rune, error) {
|
||||
case ci == escape:
|
||||
escaped = true
|
||||
case runesContain(banned, ci):
|
||||
return nil, ErrInvalidCharacter
|
||||
return nil, ErrInvalidEscapeCharacter
|
||||
default:
|
||||
unescaped = append(unescaped, ci)
|
||||
}
|
||||
}
|
||||
|
||||
if escaped {
|
||||
return nil, ErrInvalidCharacter
|
||||
return nil, ErrInvalidEscapeCharacter
|
||||
}
|
||||
|
||||
return unescaped, nil
|
||||
|
@ -10,5 +10,3 @@ object = "{" ws* (entry (ws* "," ws* entry)*)? ws* "}";
|
||||
array = "[" ws* (value (ws* "," ws* value)*)? ws* "]";
|
||||
value:alias = true | false | null | string | number | object | array;
|
||||
json = value;
|
||||
|
||||
// TODO: value should be an alias but test it first like this
|
||||
|
@ -22,8 +22,6 @@ key-val = (comment "\n" ws*)? (key | key? ws* "=" ws* value?);
|
||||
entry:alias = group-key | key-val;
|
||||
doc:root = (entry (ws* comment-line)? | wsnlc)*;
|
||||
|
||||
// TODO: not tested
|
||||
// set as root for streaming:
|
||||
single-entry = (entry (ws* comment-line)?
|
||||
| wsnlc* entry (ws* comment-line)?)
|
||||
[];
|
||||
|
14
mml.parser
14
mml.parser
@ -101,7 +101,6 @@ channel = "<>" | "<" wsnlc* int wsnlc* ">";
|
||||
and-expression:doc = "and" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
|
||||
or-expression:doc = "or" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
|
||||
|
||||
// TODO: use collect
|
||||
argument-list:alias = static-symbol (list-sep static-symbol)*;
|
||||
collect-symbol = "..." wsnlc* static-symbol;
|
||||
function-fact:alias = "(" (wsnlc | ",")*
|
||||
@ -128,7 +127,7 @@ range-to = expression;
|
||||
range-expression:alias = range-from? wsnlc* ":" wsnlc* range-to?;
|
||||
indexer-expression:alias = expression | range-expression;
|
||||
expression-indexer:alias = primary-expression wsc* "[" wsnlc* indexer-expression wsnlc* "]";
|
||||
symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression; // TODO: test with a float on a new line
|
||||
symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression;
|
||||
indexer = expression-indexer | symbol-indexer;
|
||||
|
||||
function-application = primary-expression wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
|
||||
@ -140,11 +139,10 @@ if = "if" wsnlc* expression wsnlc* block
|
||||
default = "default" wsnlc* ":";
|
||||
default-line:alias = default (wsnlc | ";")* statement?;
|
||||
case = "case" wsnlc* expression wsnlc* ":";
|
||||
case-line:alias = case (wsnlc | ";")* statement?;
|
||||
case-line:alias = case (wsc | ";")* statement?;
|
||||
switch = "switch" wsnlc* expression? wsnlc* "{" (wsnlc | ";")*
|
||||
((case-line | default-line) (sep (case-line | default-line | statement))*)?
|
||||
(wsnlc | ";")* "}";
|
||||
// TODO: empty case not handled
|
||||
|
||||
int-type = "int";
|
||||
float-type = "float";
|
||||
@ -252,7 +250,7 @@ match-set:alias = type-set | match-fact;
|
||||
match-expression:alias = match-set | static-symbol wsc* match-set;
|
||||
|
||||
match-case = "case" wsnlc* match-expression wsnlc* ":";
|
||||
match-case-line:alias = match-case (wsnlc | ";")* statement?;
|
||||
match-case-line:alias = match-case (wsc | ";")* statement?;
|
||||
match = "match" wsnlc* expression wsnlc* "{" (wsnlc | ";")*
|
||||
((match-case-line | default-line)
|
||||
(sep (match-case-line | default-line | statement))*)?
|
||||
@ -286,7 +284,7 @@ communication-group:alias = "(" wsnlc* communication wsnlc* ")";
|
||||
communication:alias = receive-expression | receive-statement | send | communication-group;
|
||||
|
||||
select-case = "case" wsnlc* communication wsnlc* ":";
|
||||
select-case-line:alias = select-case (wsnlc | ";")* statement?;
|
||||
select-case-line:alias = select-case (wsc | ";")* statement?;
|
||||
select = "select" wsnlc* "{" (wsnlc | ";")*
|
||||
((select-case-line | default-line)
|
||||
(sep (select-case-line | default-line | statement))*)?
|
||||
@ -497,10 +495,6 @@ definition:alias = value-definition
|
||||
| function-definition-group
|
||||
| effect-definition-group;
|
||||
|
||||
// TODO: cannot do:
|
||||
// type alias a int|fn () string|error
|
||||
// needs grouping of type-set
|
||||
|
||||
type-alias = "type" wsnlc* "alias" wsnlc* static-symbol wsnlc* type-set;
|
||||
type-constraint = "type" wsnlc* static-symbol wsnlc* type-set;
|
||||
|
||||
|
44
mml_test.go
44
mml_test.go
@ -1028,6 +1028,18 @@ func TestMML(t *testing.T) {
|
||||
To: 7,
|
||||
}},
|
||||
}},
|
||||
}, {
|
||||
msg: "float on a new line",
|
||||
text: "f()\n.9",
|
||||
nodes: []*Node{{
|
||||
Name: "function-application",
|
||||
Nodes: []*Node{{
|
||||
Name: "symbol",
|
||||
}},
|
||||
}, {
|
||||
Name: "float",
|
||||
}},
|
||||
ignorePosition: true,
|
||||
}, {
|
||||
msg: "function application",
|
||||
text: "f()",
|
||||
@ -1331,6 +1343,38 @@ func TestMML(t *testing.T) {
|
||||
To: 16,
|
||||
}},
|
||||
}},
|
||||
}, {
|
||||
msg: "switch, empty cases",
|
||||
text: `
|
||||
switch {
|
||||
case a:
|
||||
case b:
|
||||
default:
|
||||
f()
|
||||
}
|
||||
`,
|
||||
nodes: []*Node{{
|
||||
Name: "switch",
|
||||
Nodes: []*Node{{
|
||||
Name: "case",
|
||||
Nodes: []*Node{{
|
||||
Name: "symbol",
|
||||
}},
|
||||
}, {
|
||||
Name: "case",
|
||||
Nodes: []*Node{{
|
||||
Name: "symbol",
|
||||
}},
|
||||
}, {
|
||||
Name: "default",
|
||||
}, {
|
||||
Name: "function-application",
|
||||
Nodes: []*Node{{
|
||||
Name: "symbol",
|
||||
}},
|
||||
}},
|
||||
}},
|
||||
ignorePosition: true,
|
||||
}, {
|
||||
msg: "switch, single case",
|
||||
text: "switch a {case b: c}",
|
||||
|
@ -2,6 +2,7 @@ cleanup
|
||||
error reporting
|
||||
custom tokens
|
||||
indentation
|
||||
streaming
|
||||
benchmarking
|
||||
code generation go
|
||||
code generation js
|
||||
|
2
parse.go
2
parse.go
@ -11,7 +11,7 @@ type definition interface {
|
||||
type parser interface {
|
||||
nodeName() string
|
||||
setIncludedBy(parser, []string)
|
||||
cacheIncluded(*context, *Node)
|
||||
storeIncluded(*context, *Node)
|
||||
parse(Trace, *context)
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,3 @@
|
||||
// TODO: comment
|
||||
|
||||
ws:alias = [ \b\f\n\r\t\v];
|
||||
comment:alias = ";" [^\n]*;
|
||||
wsc:alias = ws | comment;
|
||||
|
16
sequence.go
16
sequence.go
@ -99,17 +99,17 @@ func (p *sequenceParser) setIncludedBy(includedBy parser, path []string) {
|
||||
p.includedBy = append(p.includedBy, includedBy)
|
||||
}
|
||||
|
||||
func (p *sequenceParser) cacheIncluded(c *context, n *Node) {
|
||||
func (p *sequenceParser) storeIncluded(c *context, n *Node) {
|
||||
if !c.excluded(n.From, p.name) {
|
||||
return
|
||||
}
|
||||
|
||||
nc := newNode(p.name, n.From, n.To, p.commit)
|
||||
nc.append(n)
|
||||
c.cache.set(nc.From, p.name, nc)
|
||||
c.store.set(nc.From, p.name, nc)
|
||||
|
||||
for _, includedBy := range p.includedBy {
|
||||
includedBy.cacheIncluded(c, nc)
|
||||
includedBy.storeIncluded(c, nc)
|
||||
}
|
||||
}
|
||||
|
||||
@ -138,9 +138,9 @@ func (p *sequenceParser) parse(t Trace, c *context) {
|
||||
node := newNode(p.name, c.offset, c.offset, p.commit)
|
||||
|
||||
for len(items) > 0 {
|
||||
m, ok := c.fromCache(items[0].nodeName())
|
||||
m, ok := c.fromStore(items[0].nodeName())
|
||||
if ok {
|
||||
// t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset)
|
||||
// t.Out1("sequence item found in store, match:", m, items[0].nodeName(), c.offset)
|
||||
} else {
|
||||
items[0].parse(t, c)
|
||||
m = c.match
|
||||
@ -149,7 +149,7 @@ func (p *sequenceParser) parse(t Trace, c *context) {
|
||||
if !m {
|
||||
if currentCount < ranges[0][0] {
|
||||
// t.Out1("fail, item failed")
|
||||
c.cache.set(node.From, p.name, nil)
|
||||
c.store.set(node.From, p.name, nil)
|
||||
c.fail(node.From)
|
||||
return
|
||||
}
|
||||
@ -174,9 +174,9 @@ func (p *sequenceParser) parse(t Trace, c *context) {
|
||||
|
||||
// t.Out1("success, items parsed")
|
||||
|
||||
c.cache.set(node.From, p.name, node)
|
||||
c.store.set(node.From, p.name, node)
|
||||
for _, includedBy := range p.includedBy {
|
||||
includedBy.cacheIncluded(c, node)
|
||||
includedBy.storeIncluded(c, node)
|
||||
}
|
||||
|
||||
c.success(node)
|
||||
|
@ -1,22 +1,20 @@
|
||||
package parse
|
||||
|
||||
// TODO: rename to store
|
||||
|
||||
type cacheItem struct {
|
||||
type storedItem struct {
|
||||
name string
|
||||
node *Node
|
||||
}
|
||||
|
||||
type tokenCache struct {
|
||||
match []*cacheItem // TODO: potential optimization can be to use a balanced binary tree
|
||||
type storeEntry struct {
|
||||
match []*storedItem
|
||||
noMatch []string
|
||||
}
|
||||
|
||||
type cache struct {
|
||||
tokens []*tokenCache // TODO: try with pointers, too
|
||||
type store struct {
|
||||
tokens []*storeEntry
|
||||
}
|
||||
|
||||
func (c *cache) get(offset int, name string) (*Node, bool, bool) {
|
||||
func (c *store) get(offset int, name string) (*Node, bool, bool) {
|
||||
if len(c.tokens) <= offset {
|
||||
return nil, false, false
|
||||
}
|
||||
@ -41,8 +39,8 @@ func (c *cache) get(offset int, name string) (*Node, bool, bool) {
|
||||
return nil, false, false
|
||||
}
|
||||
|
||||
func (c *cache) set(offset int, name string, n *Node) {
|
||||
var tc *tokenCache
|
||||
func (c *store) set(offset int, name string, n *Node) {
|
||||
var tc *storeEntry
|
||||
if len(c.tokens) > offset {
|
||||
tc = c.tokens[offset]
|
||||
} else {
|
||||
@ -55,7 +53,7 @@ func (c *cache) set(offset int, name string, n *Node) {
|
||||
}
|
||||
}
|
||||
|
||||
tc = &tokenCache{}
|
||||
tc = &storeEntry{}
|
||||
c.tokens[offset] = tc
|
||||
}
|
||||
|
||||
@ -86,7 +84,7 @@ func (c *cache) set(offset int, name string, n *Node) {
|
||||
}
|
||||
}
|
||||
|
||||
tc.match = append(tc.match, &cacheItem{
|
||||
tc.match = append(tc.match, &storedItem{
|
||||
name: name,
|
||||
node: n,
|
||||
})
|
@ -36,7 +36,8 @@ var (
|
||||
ErrInitFailed = errors.New("init failed")
|
||||
ErrNoParsersDefined = errors.New("no parsers defined")
|
||||
ErrInvalidInput = errors.New("invalid input")
|
||||
ErrInvalidCharacter = errors.New("invalid character") // TODO: fix two use cases, utf8 and boot
|
||||
ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
|
||||
ErrInvalidEscapeCharacter = errors.New("invalid escape character")
|
||||
ErrUnexpectedCharacter = errors.New("unexpected character")
|
||||
ErrInvalidSyntax = errors.New("invalid syntax")
|
||||
ErrRootAlias = errors.New("root node cannot be an alias")
|
||||
|
@ -9,25 +9,16 @@ comment = comment-segment (ws-no-nl* "\n"? ws-no-nl* comment-segme
|
||||
|
||||
any-char = "."; // equivalent to [^]
|
||||
|
||||
// TODO: document matching terminal: []
|
||||
|
||||
// TODO: handle char class equivalences
|
||||
|
||||
// TODO: enable streaming
|
||||
|
||||
// TODO: set route function in generated code?
|
||||
|
||||
// caution: newline is accepted
|
||||
class-not = "^";
|
||||
class-char = [^\\\[\]\^\-] | "\\" .;
|
||||
char-range = class-char "-" class-char;
|
||||
char-class = "[" class-not? (class-char | char-range)* "]";
|
||||
|
||||
// caution: newline is accepted
|
||||
// newline is accepted
|
||||
sequence-char = [^\\"] | "\\" .;
|
||||
char-sequence = "\"" sequence-char* "\"";
|
||||
|
||||
// TODO: this can be mixed up with sequence. Is it fine? fix this, see mml symbol
|
||||
terminal:alias = any-char | char-class | char-sequence;
|
||||
|
||||
symbol = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+;
|
||||
@ -50,11 +41,11 @@ quantity:alias = count-quantifier
|
||||
| zero-or-one;
|
||||
|
||||
item = (terminal | symbol | group) quantity?;
|
||||
sequence = item (wsc* item)*; // TODO: why was this '+'?
|
||||
sequence = item (wsc* item)*;
|
||||
|
||||
element:alias = terminal | symbol | group | sequence;
|
||||
|
||||
// DOC: once cached, doesn't try again, even in a new context, therefore the order may matter
|
||||
// DOC: how the order matters
|
||||
choice = element (wsc* "|" wsc* element)+;
|
||||
|
||||
// DOC: not having 'not' needs some tricks sometimes
|
||||
|
Loading…
Reference in New Issue
Block a user