import code to its own repo

This commit is contained in:
Arpad Ryszka 2017-06-25 17:51:08 +02:00
parent 00332200ca
commit b86872c58e
30 changed files with 8291 additions and 0 deletions

17
Makefile Normal file
View File

@ -0,0 +1,17 @@
SOURCES = $(shell find . -name '*.go')
default: build
imports:
@goimports -w $(SOURCES)
build: $(SOURCES)
go build ./...
check: build
go test ./... -test.short -run ^Test
fmt: $(SOURCES)
@gofmt -w -s $(SOURCES)
precommit: build check fmt

211
boot.go Normal file
View File

@ -0,0 +1,211 @@
package parse
import (
"errors"
"os"
"strconv"
)
var errInvalidDefinition = errors.New("invalid syntax definition")
func stringToCommitType(s string) CommitType {
switch s {
case "alias":
return Alias
case "doc":
return Documentation
case "root":
return Root
default:
return None
}
}
func checkBootDefinitionLength(d []string) error {
if len(d) < 3 {
return errInvalidDefinition
}
switch d[0] {
case "chars", "class":
if len(d) < 4 {
return errInvalidDefinition
}
case "quantifier":
if len(d) != 6 {
return errInvalidDefinition
}
case "sequence", "choice":
if len(d) < 4 {
return errInvalidDefinition
}
}
return nil
}
func parseClass(c []rune) (not bool, chars []rune, ranges [][]rune, err error) {
if c[0] == '^' {
not = true
c = c[1:]
}
for {
if len(c) == 0 {
return
}
var c0 rune
c0, c = c[0], c[1:]
switch c0 {
case '[', ']', '^', '-':
err = errInvalidDefinition
return
}
if c0 == '\\' {
if len(c) == 0 {
err = errInvalidDefinition
return
}
c0, c = unescapeChar(c[0]), c[1:]
}
if len(c) < 2 || c[0] != '-' {
chars = append(chars, c0)
continue
}
var c1 rune
c1, c = c[1], c[2:]
if c1 == '\\' {
if len(c) == 0 {
err = errInvalidDefinition
return
}
c1, c = unescapeChar(c[0]), c[1:]
}
ranges = append(ranges, []rune{c0, c1})
}
}
func defineBootAnything(s *Syntax, d []string) error {
ct := stringToCommitType(d[2])
return s.AnyChar(d[1], ct)
}
func defineBootClass(s *Syntax, d []string) error {
ct := stringToCommitType(d[2])
not, chars, ranges, err := parseClass([]rune(d[3]))
if err != nil {
return err
}
return s.Class(d[1], ct, not, chars, ranges)
}
func defineBootCharSequence(s *Syntax, d []string) error {
ct := stringToCommitType(d[2])
chars, err := unescape('\\', []rune{'"', '\\'}, []rune(d[3]))
if err != nil {
return err
}
return s.CharSequence(d[1], ct, chars)
}
func defineBootQuantifier(s *Syntax, d []string) error {
ct := stringToCommitType(d[2])
var (
min, max int
err error
)
if min, err = strconv.Atoi(d[4]); err != nil {
return err
}
if max, err = strconv.Atoi(d[5]); err != nil {
return err
}
return s.Quantifier(d[1], ct, d[3], min, max)
}
func defineBootSequence(s *Syntax, d []string) error {
ct := stringToCommitType(d[2])
return s.Sequence(d[1], ct, d[3:]...)
}
func defineBootChoice(s *Syntax, d []string) error {
ct := stringToCommitType(d[2])
return s.Choice(d[1], ct, d[3:]...)
}
func defineBoot(s *Syntax, d []string) error {
switch d[0] {
case "anything":
return defineBootAnything(s, d)
case "class":
return defineBootClass(s, d)
case "chars":
return defineBootCharSequence(s, d)
case "quantifier":
return defineBootQuantifier(s, d)
case "sequence":
return defineBootSequence(s, d)
case "choice":
return defineBootChoice(s, d)
default:
return errInvalidDefinition
}
}
func defineAllBoot(s *Syntax, defs [][]string) error {
for _, d := range defs {
if err := defineBoot(s, d); err != nil {
return err
}
}
return nil
}
func initBoot(t Trace, definitions [][]string) (*Syntax, error) {
s := NewSyntax(t)
if err := defineAllBoot(s, definitions); err != nil {
return nil, err
}
return s, s.Init()
}
func bootSyntax(t Trace) (*Syntax, error) {
b, err := initBoot(t, bootDefinitions)
if err != nil {
return nil, err
}
f, err := os.Open("syntax.p")
if err != nil {
return nil, err
}
defer f.Close()
doc, err := b.Parse(f)
if err != nil {
return nil, err
}
s := NewSyntax(t)
return s, define(s, doc)
}

73
boot_test.go Normal file
View File

@ -0,0 +1,73 @@
package parse
import (
"os"
"testing"
)
func TestBoot(t *testing.T) {
var trace Trace
// trace = NewTrace(2)
b, err := initBoot(trace, bootDefinitions)
if err != nil {
t.Error(err)
return
}
f, err := os.Open("syntax.p")
if err != nil {
t.Error(err)
return
}
defer f.Close()
n0, err := b.Parse(f)
if err != nil {
t.Error(err)
return
}
s0 := NewSyntax(trace)
if err := define(s0, n0); err != nil {
t.Error(err)
}
_, err = f.Seek(0, 0)
if err != nil {
t.Error(err)
return
}
n1, err := s0.Parse(f)
if err != nil {
t.Error(err)
return
}
checkNode(t, n1, n0)
if t.Failed() {
return
}
s1 := NewSyntax(trace)
if err := define(s1, n1); err != nil {
t.Error(err)
return
}
_, err = f.Seek(0, 0)
if err != nil {
t.Error(err)
return
}
n2, err := s1.Parse(f)
if err != nil {
t.Error(err)
return
}
checkNode(t, n2, n1)
}

285
bootsyntax.go Normal file
View File

@ -0,0 +1,285 @@
package parse
var bootDefinitions = [][]string{{
"chars", "space", "alias", " ",
}, {
"chars", "tab", "alias", "\\t",
}, {
"chars", "nl", "alias", "\\n",
}, {
"chars", "backspace", "alias", "\\b",
}, {
"chars", "formfeed", "alias", "\\f",
}, {
"chars", "carryreturn", "alias", "\\r",
}, {
"chars", "verticaltab", "alias", "\\v",
}, {
"choice",
"ws",
"alias",
"space",
"tab",
"nl",
"backspace",
"formfeed",
"carryreturn",
"verticaltab",
}, {
"chars", "open-block-comment", "alias", "/*",
}, {
"chars", "close-block-comment", "alias", "*/",
}, {
"chars", "star", "alias", "*",
}, {
"class", "not-slash", "alias", "^/",
}, {
"class", "not-star", "alias", "^*",
}, {
"chars", "double-slash", "alias", "//",
}, {
"class", "not-nl", "alias", "^\\n",
}, {
"sequence", "not-block-close", "alias", "star", "not-slash",
}, {
"choice", "block-comment-char", "alias", "not-block-close", "not-star",
}, {
"quantifier", "block-comment-body", "alias", "block-comment-char", "0", "-1",
}, {
"sequence",
"block-comment",
"alias",
"open-block-comment",
"block-comment-body",
"close-block-comment",
}, {
"quantifier", "not-nls", "alias", "not-nl", "0", "-1",
}, {
"sequence", "line-comment", "alias", "double-slash", "not-nls",
}, {
"choice", "comment-segment", "alias", "block-comment", "line-comment",
}, {
"quantifier", "wss", "alias", "ws", "0", "-1",
}, {
"quantifier", "optional-nl", "alias", "nl", "0", "1",
}, {
"choice",
"ws-no-nl",
"alias",
"space",
"tab",
"backspace",
"formfeed",
"carryreturn",
"verticaltab",
}, {
"sequence",
"continue-comment-segment",
"alias",
"ws-no-nl",
"optional-nl",
"ws-no-nl",
"comment-segment",
}, {
"quantifier", "continue-comment", "alias", "continue-comment-segment", "0", "-1",
}, {
"sequence",
"comment",
"none",
"comment-segment",
"continue-comment",
}, {
"choice", "wsc", "alias", "ws", "comment",
}, {
"quantifier", "wscs", "alias", "wsc", "0", "-1",
}, {
"anything", "anything", "alias",
}, {
"chars", "any-char", "none", ".",
}, {
"chars", "open-square", "alias", "[",
}, {
"chars", "close-square", "alias", "]",
}, {
"chars", "class-not", "none", "^",
}, {
"chars", "dash", "alias", "-",
}, {
"quantifier", "optional-class-not", "alias", "class-not", "0", "1",
}, {
"class", "not-class-control", "alias", "^\\\\\\[\\]\\^\\-",
}, {
"chars", "escape", "alias", "\\\\",
}, {
"sequence", "escaped-char", "alias", "escape", "anything",
}, {
"choice", "class-char", "none", "not-class-control", "escaped-char",
}, {
"sequence", "char-range", "none", "class-char", "dash", "class-char",
}, {
"choice", "char-or-range", "alias", "class-char", "char-range",
}, {
"quantifier", "chars-or-ranges", "alias", "char-or-range", "0", "-1",
}, {
"sequence", "char-class", "none", "open-square", "optional-class-not", "chars-or-ranges", "close-square",
}, {
"chars", "double-quote", "alias", "\\\"",
}, {
"class", "not-char-sequence-control", "alias", "^\\\\\"",
}, {
"choice", "sequence-char", "none", "not-char-sequence-control", "escaped-char",
}, {
"quantifier", "char-sequence-chars", "alias", "sequence-char", "0", "-1",
}, {
"sequence", "char-sequence", "none", "double-quote", "char-sequence-chars", "double-quote",
}, {
"choice", "terminal", "alias", "any-char", "char-class", "char-sequence",
}, {
"class", "symbol-char", "alias", "^\\\\ \\n\\t\\b\\f\\r\\v\\b/.\\[\\]\\\"{}\\^+*?|():=;",
}, {
"quantifier", "symbol-chars", "alias", "symbol-char", "1", "-1",
}, {
"sequence", "symbol", "none", "symbol-chars",
}, {
"chars", "open-paren", "alias", "(",
}, {
"chars", "close-paren", "alias", ")",
}, {
"sequence", "group", "alias", "open-paren", "wscs", "expression", "wscs", "close-paren",
}, {
"chars", "open-brace", "alias", "{",
}, {
"chars", "close-brace", "alias", "}",
}, {
"class", "digit", "alias", "0-9",
}, {
"quantifier", "number", "alias", "digit", "1", "-1",
}, {
"sequence", "count", "none", "number",
}, {
"sequence", "count-quantifier", "none", "open-brace", "wscs", "count", "wscs", "close-brace",
}, {
"sequence", "range-from", "none", "number",
}, {
"sequence", "range-to", "none", "number",
}, {
"chars", "comma", "alias", ",",
}, {
"sequence",
"range-quantifier",
"none",
"open-brace",
"wscs",
"range-from",
"wscs",
"comma",
"wscs",
"range-to",
"close-brace",
}, {
"chars", "one-or-more", "none", "+",
}, {
"chars", "zero-or-more", "none", "*",
}, {
"chars", "zero-or-one", "none", "?",
}, {
"choice",
"quantity",
"alias",
"count-quantifier",
"range-quantifier",
"one-or-more",
"zero-or-more",
"zero-or-one",
}, {
"choice", "quantifiable", "alias", "terminal", "symbol", "group",
}, {
"sequence", "quantifier", "none", "quantifiable", "wscs", "quantity",
}, {
"choice", "item", "alias", "terminal", "symbol", "group", "quantifier",
}, {
"sequence", "item-continue", "alias", "wscs", "item",
}, {
"quantifier", "items-continue", "alias", "item-continue", "0", "-1",
}, {
"sequence", "sequence", "none", "item", "items-continue",
}, {
"choice", "element", "alias", "terminal", "symbol", "group", "quantifier", "sequence",
}, {
"chars", "pipe", "alias", "|",
}, {
"sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element",
}, {
"quantifier", "elements-continue", "alias", "element-continue", "1", "-1",
}, {
"sequence", "choice", "none", "element", "elements-continue",
}, {
"choice",
"expression",
"alias",
"terminal",
"symbol",
"group",
"quantifier",
"sequence",
"choice",
}, {
"chars", "alias", "none", "alias",
}, {
"chars", "doc", "none", "doc",
}, {
"chars", "root", "none", "root",
}, {
"choice", "flag", "alias", "alias", "doc", "root",
}, {
"chars", "colon", "alias", ":",
}, {
"sequence", "flag-tag", "alias", "colon", "flag",
}, {
"quantifier", "flags", "alias", "flag-tag", "0", "-1",
}, {
"chars", "equal", "alias", "=",
}, {
"sequence", "definition", "none", "symbol", "flags", "wscs", "equal", "wscs", "expression",
}, {
"chars", "semicolon", "alias", ";",
}, {
"choice", "wsc-or-semicolon", "alias", "wsc", "semicolon",
}, {
"quantifier", "wsc-or-semicolons", "alias", "wsc-or-semicolon", "0", "-1",
}, {
"sequence",
"subsequent-definition",
"alias",
"wscs",
"semicolon",
"wsc-or-semicolons",
"definition",
}, {
"quantifier",
"subsequent-definitions",
"alias",
"subsequent-definition",
"0",
"-1",
}, {
"sequence",
"definitions",
"alias",
"definition",
"subsequent-definitions",
}, {
"quantifier",
"opt-definitions",
"alias",
"definitions",
"0",
"1",
}, {
"sequence",
"syntax",
"root",
"wsc-or-semicolons",
"opt-definitions",
"wsc-or-semicolons",
}}

94
cache.go Normal file
View File

@ -0,0 +1,94 @@
package parse
type cacheItem struct {
name string
node *Node
}
type tokenCache struct {
match []*cacheItem // TODO: potential optimization can be to use a balanced binary tree
noMatch []string
}
type cache struct {
tokens []*tokenCache // TODO: try with pointers, too
}
func (c *cache) get(offset int, name string) (*Node, bool, bool) {
if len(c.tokens) <= offset {
return nil, false, false
}
tc := c.tokens[offset]
if tc == nil {
return nil, false, false
}
for _, i := range tc.noMatch {
if i == name {
return nil, false, true
}
}
for _, i := range tc.match {
if i.name == name {
return i.node, true, true
}
}
return nil, false, false
}
func (c *cache) setOne(offset int, name string, n *Node) {
}
func (c *cache) set(offset int, name string, n *Node) {
if len(c.tokens) <= offset {
if cap(c.tokens) > offset {
c.tokens = c.tokens[:offset+1]
} else {
c.tokens = c.tokens[:cap(c.tokens)]
for len(c.tokens) <= offset {
c.tokens = append(c.tokens, nil)
}
}
}
tc := c.tokens[offset]
if tc == nil {
tc = &tokenCache{}
c.tokens[offset] = tc
}
if n == nil {
for _, i := range tc.match {
if i.name == name {
return
}
}
for _, i := range tc.noMatch {
if i == name {
return
}
}
tc.noMatch = append(tc.noMatch, name)
return
}
for _, i := range tc.match {
if i.name == name {
if n.tokenLength() > i.node.tokenLength() {
i.node = n
}
return
}
}
tc.match = append(tc.match, &cacheItem{
name: name,
node: n,
})
}

108
char.go Normal file
View File

@ -0,0 +1,108 @@
package parse
type charParser struct {
name string
commit CommitType
any bool
not bool
chars []rune
ranges [][]rune
includedBy []parser
}
func newChar(
name string,
ct CommitType,
any, not bool,
chars []rune,
ranges [][]rune,
) *charParser {
return &charParser{
name: name,
commit: ct,
any: any,
not: not,
chars: chars,
ranges: ranges,
}
}
func (p *charParser) nodeName() string { return p.name }
func (p *charParser) parser(r *registry, path []string) (parser, error) {
if stringsContain(path, p.name) {
panic(errCannotIncludeParsers)
}
r.setParser(p)
return p, nil
}
func (p *charParser) commitType() CommitType {
return p.commit
}
func (p *charParser) setIncludedBy(i parser, path []string) {
if stringsContain(path, p.name) {
panic(errCannotIncludeParsers)
}
p.includedBy = append(p.includedBy, i)
}
func (p *charParser) cacheIncluded(*context, *Node) {
panic(errCannotIncludeParsers)
}
func (p *charParser) match(t rune) bool {
if p.any {
return true
}
for _, ci := range p.chars {
if ci == t {
return !p.not
}
}
for _, ri := range p.ranges {
if t >= ri[0] && t <= ri[1] {
return !p.not
}
}
return p.not
}
func (p *charParser) parse(t Trace, c *context) {
t = t.Extend(p.name)
t.Out1("parsing char", c.offset)
if p.commit&Documentation != 0 {
t.Out1("fail, doc")
c.fail(c.offset)
return
}
if m, ok := c.fromCache(p.name); ok {
t.Out1("found in cache, match:", m)
return
}
if tok, ok := c.token(); ok && p.match(tok) {
t.Out1("success", string(tok))
n := newNode(p.name, p.commit, c.offset, c.offset+1)
c.cache.set(c.offset, p.name, n)
for _, i := range p.includedBy {
i.cacheIncluded(c, n)
}
c.success(n)
return
} else {
t.Out1("fail", string(tok))
c.cache.set(c.offset, p.name, nil)
c.fail(c.offset)
return
}
}

180
choice.go Normal file
View File

@ -0,0 +1,180 @@
package parse
type choiceDefinition struct {
name string
commit CommitType
elements []string
}
type choiceParser struct {
name string
commit CommitType
elements []parser
including []parser
}
func newChoice(name string, ct CommitType, elements []string) *choiceDefinition {
return &choiceDefinition{
name: name,
commit: ct,
elements: elements,
}
}
func (d *choiceDefinition) nodeName() string { return d.name }
// could store and cache everything that it fulfils
func (d *choiceDefinition) parser(r *registry, path []string) (parser, error) {
p, ok := r.parser(d.name)
if ok {
return p, nil
}
cp := &choiceParser{
name: d.name,
commit: d.commit,
}
r.setParser(cp)
var elements []parser
path = append(path, d.name)
for _, e := range d.elements {
element, ok := r.parser(e)
if ok {
elements = append(elements, element)
element.setIncludedBy(cp, path)
continue
}
elementDefinition, ok := r.definition(e)
if !ok {
return nil, parserNotFound(e)
}
element, err := elementDefinition.parser(r, path)
if err != nil {
return nil, err
}
element.setIncludedBy(cp, path)
elements = append(elements, element)
}
cp.elements = elements
return cp, nil
}
func (d *choiceDefinition) commitType() CommitType {
return d.commit
}
func (p *choiceParser) nodeName() string { return p.name }
func (p *choiceParser) setIncludedBy(i parser, path []string) {
if stringsContain(path, p.name) {
return
}
p.including = append(p.including, i)
}
func (p *choiceParser) cacheIncluded(c *context, n *Node) {
if !c.excluded(n.from, p.name) {
return
}
nc := newNode(p.name, p.commit, n.from, n.to)
nc.append(n)
c.cache.set(nc.from, p.name, nc)
// maybe it is enough to cache only those that are on the path
for _, i := range p.including {
i.cacheIncluded(c, nc)
}
}
func (p *choiceParser) parse(t Trace, c *context) {
t = t.Extend(p.name)
t.Out1("parsing choice", c.offset)
if p.commit&Documentation != 0 {
t.Out1("fail, doc")
c.fail(c.offset)
return
}
if m, ok := c.fromCache(p.name); ok {
t.Out1("found in cache, match:", m)
return
}
if c.excluded(c.offset, p.name) {
t.Out1("excluded")
c.fail(c.offset)
return
}
c.exclude(c.offset, p.name)
defer c.include(c.offset, p.name)
node := newNode(p.name, p.commit, c.offset, c.offset)
var match bool
for {
elements := p.elements
var foundMatch bool
// TODO: this can be the entry point for a transformation that enables the
// processing of massive amounts of autogenerated rules in parallel in a
// continously, dynamically cached way. E.g. teach a machine that learns
// everything from a public library.
t.Out2("elements again")
for len(elements) > 0 {
t.Out2("in the choice", c.offset, node.from, elements[0].nodeName())
elements[0].parse(t, c)
elements = elements[1:]
c.offset = node.from
if !c.match || match && c.node.tokenLength() <= node.tokenLength() {
t.Out2("skipping")
continue
}
t.Out2("appending", c.node.tokenLength(), node.tokenLength(),
"\"", string(c.tokens[node.from:node.to]), "\"",
"\"", string(c.tokens[c.node.from:c.node.to]), "\"",
c.node.Name,
)
match = true
foundMatch = true
// node.clear()
node = newNode(p.name, p.commit, c.offset, c.offset) // TODO: review caching conditions
node.append(c.node)
c.cache.set(node.from, p.name, node)
for _, i := range p.including {
i.cacheIncluded(c, node)
}
// TODO: a simple break here can force PEG-style "priority" choices
}
if !foundMatch {
break
}
}
if match {
t.Out1("choice, success")
t.Out2("choice done", node.nodeLength())
c.success(node)
return
}
t.Out1("fail")
c.cache.set(node.from, p.name, nil)
c.fail(node.from)
}

152
context.go Normal file
View File

@ -0,0 +1,152 @@
package parse
import (
"io"
"unicode"
)
type context struct {
reader io.RuneReader
offset int
readOffset int
readErr error
eof bool
cache *cache
tokens []rune
match bool
node *Node
isExcluded [][]string
}
func newContext(r io.RuneReader) *context {
return &context{
reader: r,
cache: &cache{},
}
}
func (c *context) read() bool {
if c.eof || c.readErr != nil {
return false
}
t, n, err := c.reader.ReadRune()
if err != nil {
if err == io.EOF {
if n == 0 {
c.eof = true
return false
}
} else {
c.readErr = err
return false
}
}
c.readOffset++
if t == unicode.ReplacementChar {
c.readErr = ErrInvalidCharacter
return false
}
c.tokens = append(c.tokens, t)
return true
}
func (c *context) token() (rune, bool) {
if c.offset == c.readOffset {
if !c.read() {
return 0, false
}
}
return c.tokens[c.offset], true
}
func (c *context) excluded(offset int, name string) bool {
if len(c.isExcluded) <= offset {
return false
}
return stringsContain(c.isExcluded[offset], name)
}
func (c *context) exclude(offset int, name string) {
if len(c.isExcluded) <= offset {
c.isExcluded = append(c.isExcluded, nil)
if cap(c.isExcluded) > offset {
c.isExcluded = c.isExcluded[:offset+1]
} else {
c.isExcluded = append(
c.isExcluded[:cap(c.isExcluded)],
make([][]string, offset+1-cap(c.isExcluded))...,
)
}
}
c.isExcluded[offset] = append(c.isExcluded[offset], name)
}
func (c *context) include(offset int, name string) {
if len(c.isExcluded) <= offset {
return
}
for i := len(c.isExcluded[offset]) - 1; i >= 0; i-- {
if c.isExcluded[offset][i] == name {
c.isExcluded[offset] = append(c.isExcluded[offset][:i], c.isExcluded[offset][i+1:]...)
}
}
}
func (c *context) fromCache(name string) (bool, bool) {
n, m, ok := c.cache.get(c.offset, name)
if !ok {
return false, false
}
if m {
c.success(n)
} else {
c.fail(c.offset)
}
return m, true
}
func (c *context) success(n *Node) {
c.node = n
c.offset = n.to
c.match = true
}
func (c *context) fail(offset int) {
c.offset = offset
c.match = false
}
func (c *context) finalize() error {
if c.node.to < c.readOffset {
return ErrUnexpectedCharacter
}
if !c.eof {
c.read()
if !c.eof {
if c.readErr != nil {
return c.readErr
}
return ErrUnexpectedCharacter
}
}
c.node.commit()
if c.node.commitType&Alias != 0 {
return nil
}
c.node.applyTokens(c.tokens)
return nil
}

274
define.go Normal file
View File

@ -0,0 +1,274 @@
package parse
import "strconv"
func runesContain(rs []rune, r rune) bool {
for _, ri := range rs {
if ri == r {
return true
}
}
return false
}
func unescapeChar(c rune) rune {
switch c {
case 'n':
return '\n'
case 't':
return '\t'
case 'b':
return '\b'
case 'f':
return '\f'
case 'r':
return '\r'
case 'v':
return '\v'
default:
return c
}
}
func unescape(escape rune, banned []rune, chars []rune) ([]rune, error) {
var (
unescaped []rune
escaped bool
)
for _, ci := range chars {
if escaped {
unescaped = append(unescaped, unescapeChar(ci))
escaped = false
continue
}
switch {
case ci == escape:
escaped = true
case runesContain(banned, ci):
return nil, ErrInvalidCharacter
default:
unescaped = append(unescaped, ci)
}
}
if escaped {
return nil, ErrInvalidCharacter
}
return unescaped, nil
}
func dropComments(n *Node) *Node {
ncc := *n
nc := &ncc
nc.Nodes = nil
for _, ni := range n.Nodes {
if ni.Name == "comment" {
continue
}
nc.Nodes = append(nc.Nodes, dropComments(ni))
}
return nc
}
func flagsToCommitType(n []*Node) CommitType {
var ct CommitType
for _, ni := range n {
switch ni.Name {
case "alias":
ct |= Alias
case "doc":
ct |= Documentation
case "root":
ct |= Root
}
}
return ct
}
func toRune(c string) rune {
return []rune(c)[0]
}
func nodeChar(n *Node) rune {
s := n.Text()
if s[0] == '\\' {
return unescapeChar(toRune(s[1:]))
}
return toRune(s)
}
func defineMembers(s *Syntax, name string, n ...*Node) ([]string, error) {
var refs []string
for i, ni := range n {
nmi := childName(name, i)
switch ni.Name {
case "symbol":
refs = append(refs, ni.Text())
default:
refs = append(refs, nmi)
if err := defineExpression(s, nmi, Alias, ni); err != nil {
return nil, err
}
}
}
return refs, nil
}
func defineClass(s *Syntax, name string, ct CommitType, n []*Node) error {
var (
not bool
chars []rune
ranges [][]rune
)
if len(n) > 0 && n[0].Name == "class-not" {
not, n = true, n[1:]
}
for _, c := range n {
switch c.Name {
case "class-char":
chars = append(chars, nodeChar(c))
case "char-range":
ranges = append(ranges, []rune{nodeChar(c.Nodes[0]), nodeChar(c.Nodes[1])})
}
}
return s.Class(name, ct, not, chars, ranges)
}
func defineCharSequence(s *Syntax, name string, ct CommitType, charNodes []*Node) error {
var chars []rune
for _, ci := range charNodes {
chars = append(chars, nodeChar(ci))
}
return s.CharSequence(name, ct, chars)
}
func defineQuantifier(s *Syntax, name string, ct CommitType, n *Node, q *Node) error {
refs, err := defineMembers(s, name, n)
if err != nil {
return err
}
var min, max int
switch q.Name {
case "count-quantifier":
min, err = strconv.Atoi(q.Nodes[0].Text())
if err != nil {
return err
}
max = min
case "range-quantifier":
min = 0
max = -1
for _, rq := range q.Nodes {
switch rq.Name {
case "range-from":
min, err = strconv.Atoi(rq.Text())
if err != nil {
return err
}
case "range-to":
max, err = strconv.Atoi(rq.Text())
if err != nil {
return err
}
default:
return ErrInvalidSyntax
}
}
case "one-or-more":
min, max = 1, -1
case "zero-or-more":
min, max = 0, -1
case "zero-or-one":
min, max = 0, 1
}
return s.Quantifier(name, ct, refs[0], min, max)
}
func defineSequence(s *Syntax, name string, ct CommitType, n ...*Node) error {
refs, err := defineMembers(s, name, n...)
if err != nil {
return err
}
// // TODO: try to make this expressed in the syntax (maybe as sequences need either a quantififer or not
// // one item? or by maintaining the excluded and caching in the sequence in a similar way when there is
// // only one item?) how does this effect the quantifiers?
// if len(refs) == 1 {
// return s.Choice(name, ct, refs[0])
// }
return s.Sequence(name, ct, refs...)
}
func defineChoice(s *Syntax, name string, ct CommitType, n ...*Node) error {
refs, err := defineMembers(s, name, n...)
if err != nil {
return err
}
return s.Choice(name, ct, refs...)
}
func defineExpression(s *Syntax, name string, ct CommitType, expression *Node) error {
var err error
switch expression.Name {
case "any-char":
err = s.AnyChar(name, ct)
case "char-class":
err = defineClass(s, name, ct, expression.Nodes)
case "char-sequence":
err = defineCharSequence(s, name, ct, expression.Nodes)
case "symbol":
err = defineSequence(s, name, ct, expression)
case "quantifier":
err = defineQuantifier(s, name, ct, expression.Nodes[0], expression.Nodes[1])
case "sequence":
err = defineSequence(s, name, ct, expression.Nodes...)
case "choice":
err = defineChoice(s, name, ct, expression.Nodes...)
}
return err
}
func defineDefinition(s *Syntax, n *Node) error {
return defineExpression(
s,
n.Nodes[0].Text(),
flagsToCommitType(n.Nodes[1:len(n.Nodes)-1]),
n.Nodes[len(n.Nodes)-1],
)
}
func define(s *Syntax, n *Node) error {
if n.Name != "syntax" {
return ErrInvalidSyntax
}
n = dropComments(n)
for _, ni := range n.Nodes {
if err := defineDefinition(s, ni); err != nil {
return err
}
}
return nil
}

57
eskip.p Normal file
View File

@ -0,0 +1,57 @@
/*
Eskip routing configuration format for Skipper: https://github.com/zalando/skipper
*/
// TODO: definition with comment, doc = comment, or just replace comment
eskip:root = (expression | definitions)?;
comment-line:alias = "//" [^\n]*;
space:alias = [ \b\f\r\t\v];
comment:alias = comment-line (space* "\n" space* comment-line)*;
wsc:alias = [ \b\f\n\r\t\v] | comment;
decimal-digit:alias = [0-9];
octal-digit:alias = [0-7];
hexa-digit:alias = [0-9a-fA-F];
decimal:alias = [1-9] decimal-digit*;
octal:alias = "0" octal-digit*;
hexa:alias = "0" [xX] hexa-digit+;
int = decimal | octal | hexa;
exponent:alias = [eE] [+\-]? decimal-digit+;
float = decimal-digit+ "." decimal-digit* exponent?
| "." decimal-digit+ exponent?
| decimal-digit+ exponent;
number:alias = "-"? (int | float);
string = "\"" ([^\\"] | "\\" .)* "\"";
regexp = "/" ([^\\/] | "\\" .)* "/";
symbol = [a-zA-Z_] [a-zA-z0-9_]*;
arg:alias = number | string | regexp;
args:alias = arg (wsc* "," wsc* arg)*;
term:alias = symbol wsc* "(" wsc* args? wsc* ")";
predicate = term;
predicates:alias = "*" | predicate (wsc* "&&" wsc* predicate)*;
filter = term;
filters:alias = filter (wsc* "->" wsc* filter)*;
address:alias = string;
shunt = "<shunt>";
loopback = "<loopback>";
backend:alias = address | shunt | loopback;
expression = predicates (wsc* "->" wsc* filters)? wsc* "->" wsc* backend;
id:alias = symbol;
definition = id wsc* ":" wsc* expression;
free-sep:alias = (wsc | ";");
sep:alias = wsc* ";" free-sep*;
definitions:alias = free-sep* definition (sep definition)* free-sep*;

749
eskip_test.go Normal file
View File

@ -0,0 +1,749 @@
package parse
import (
"bytes"
"errors"
"fmt"
"math/rand"
"strconv"
"strings"
"testing"
"github.com/zalando/skipper/eskip"
)
const (
maxID = 27
meanID = 9
setPathChance = 0.72
maxPathTags = 12
meanPathTags = 2
maxPathTag = 24
meanPathTag = 9
setHostChance = 0.5
maxHost = 48
meanHost = 24
setPathRegexpChance = 0.45
maxPathRegexp = 36
meanPathRegexp = 12
setMethodChance = 0.1
setHeadersChance = 0.3
maxHeadersLength = 6
meanHeadersLength = 1
maxHeaderKeyLength = 18
meanHeaderKeyLength = 12
maxHeaderValueLength = 48
meanHeaderValueLength = 6
setHeaderRegexpChance = 0.05
maxHeaderRegexpsLength = 3
meanHeaderRegexpsLength = 1
maxHeaderRegexpLength = 12
meanHeaderRegexpLength = 6
maxTermNameLength = 15
meanTermNameLength = 6
maxTermArgsLength = 6
meanTermArgsLength = 1
floatArgChance = 0.1
intArgChance = 0.3
maxTermStringLength = 24
meanTermStringLength = 6
maxPredicatesLength = 4
meanPredicatesLength = 1
maxFiltersLength = 18
meanFiltersLength = 3
loopBackendChance = 0.05
shuntBackendChance = 0.1
maxBackend = 48
meanBackend = 15
)
func takeChance(c float64) bool {
return rand.Float64() < c
}
func generateID() string {
return generateString(maxID, meanID)
}
func generatePath() string {
if !takeChance(setPathChance) {
return ""
}
l := randomLength(maxPathTags, meanPathTags)
p := append(make([]string, 0, l+1), "")
for i := 0; i < l; i++ {
p = append(p, generateString(maxPathTag, meanPathTag))
}
return strings.Join(p, "/")
}
func generateHostRegexps() []string {
if !takeChance(setHostChance) {
return nil
}
return []string{generateString(maxHost, meanHost)}
}
func generatePathRegexps() []string {
if !takeChance(setPathRegexpChance) {
return nil
}
return []string{generateString(maxPathRegexp, meanPathRegexp)}
}
func generateMethod() string {
if !takeChance(setMethodChance) {
return ""
}
methods := []string{"GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"}
return methods[rand.Intn(len(methods))]
}
func generateHeaders() map[string]string {
if !takeChance(setHeadersChance) {
return nil
}
h := make(map[string]string)
for i := 0; i < randomLength(maxHeadersLength, meanHeadersLength); i++ {
h[generateString(maxHeaderKeyLength, meanHeaderKeyLength)] =
generateString(maxHeaderValueLength, meanHeaderValueLength)
}
return h
}
func generateHeaderRegexps() map[string][]string {
if !takeChance(setHeaderRegexpChance) {
return nil
}
h := make(map[string][]string)
for i := 0; i < randomLength(maxHeaderRegexpsLength, meanHeaderRegexpsLength); i++ {
k := generateString(maxHeaderKeyLength, meanHeaderKeyLength)
for i := 0; i < randomLength(maxHeaderRegexpLength, meanHeaderRegexpLength); i++ {
h[k] = append(h[k], generateString(maxHeaderValueLength, meanHeaderValueLength))
}
}
return h
}
func generateTerm() (string, []interface{}) {
n := generateString(maxTermNameLength, meanTermNameLength)
al := randomLength(maxTermArgsLength, meanTermArgsLength)
a := make([]interface{}, 0, al)
for i := 0; i < al; i++ {
at := rand.Float64()
switch {
case at < floatArgChance:
a = append(a, rand.NormFloat64())
case at < intArgChance:
a = append(a, rand.Int())
default:
a = append(a, generateString(maxTermStringLength, meanTermStringLength))
}
}
return n, a
}
func generatePredicates() []*eskip.Predicate {
l := randomLength(maxPredicatesLength, meanPredicatesLength)
p := make([]*eskip.Predicate, 0, l)
for i := 0; i < l; i++ {
pi := &eskip.Predicate{}
pi.Name, pi.Args = generateTerm()
p = append(p, pi)
}
return p
}
func generateFilters() []*eskip.Filter {
l := randomLength(maxFiltersLength, meanFiltersLength)
f := make([]*eskip.Filter, 0, l)
for i := 0; i < l; i++ {
fi := &eskip.Filter{}
fi.Name, fi.Args = generateTerm()
f = append(f, fi)
}
return f
}
func generateBackend() (eskip.BackendType, string) {
t := rand.Float64()
switch {
case t < loopBackendChance:
return eskip.LoopBackend, ""
case t < loopBackendChance+shuntBackendChance:
return eskip.ShuntBackend, ""
default:
return eskip.NetworkBackend, generateString(maxBackend, meanBackend)
}
}
func generateRoute() *eskip.Route {
r := &eskip.Route{}
r.Id = generateID()
r.Path = generatePath()
r.HostRegexps = generateHostRegexps()
r.PathRegexps = generatePathRegexps()
r.Method = generateMethod()
r.Headers = generateHeaders()
r.HeaderRegexps = generateHeaderRegexps()
r.Predicates = generatePredicates()
r.Filters = generateFilters()
r.BackendType, r.Backend = generateBackend()
return r
}
func generateEskip(l int) []*eskip.Route {
r := make([]*eskip.Route, 0, l)
for i := 0; i < l; i++ {
r = append(r, generateRoute())
}
return r
}
func parseEskipInt(s string) (int, error) {
i, err := strconv.ParseInt(s, 0, 64)
return int(i), err
}
func parseEskipFloat(s string) (float64, error) {
f, err := strconv.ParseFloat(s, 64)
return f, err
}
func unquote(s string, escapedChars string) (string, error) {
if len(s) < 2 {
return "", nil
}
b := make([]byte, 0, len(s)-2)
var escaped bool
for _, bi := range []byte(s[1 : len(s)-1]) {
if escaped {
switch bi {
case 'b':
bi = '\b'
case 'f':
bi = '\f'
case 'n':
bi = '\n'
case 'r':
bi = '\r'
case 't':
bi = '\t'
case 'v':
bi = '\v'
}
b = append(b, bi)
escaped = false
continue
}
for _, ec := range []byte(escapedChars) {
if ec == bi {
return "", errors.New("invalid quote")
}
}
if bi == '\\' {
escaped = true
continue
}
b = append(b, bi)
}
return string(b), nil
}
func unquoteString(s string) (string, error) {
return unquote(s, "\"")
}
func unquoteRegexp(s string) (string, error) {
return unquote(s, "/")
}
func nodeToArg(n *Node) (interface{}, error) {
switch n.Name {
case "int":
return parseEskipInt(n.Text())
case "float":
return parseEskipFloat(n.Text())
case "string":
return unquoteString(n.Text())
case "regexp":
return unquoteRegexp(n.Text())
default:
return nil, errors.New("invalid arg")
}
}
func nodeToTerm(n *Node) (string, []interface{}, error) {
if len(n.Nodes) < 1 || n.Nodes[0].Name != "symbol" {
return "", nil, errors.New("invalid term")
}
name := n.Nodes[0].Text()
var args []interface{}
for _, ni := range n.Nodes[1:] {
a, err := nodeToArg(ni)
if err != nil {
return "", nil, err
}
args = append(args, a)
}
return name, args, nil
}
func nodeToPredicate(r *eskip.Route, n *Node) error {
name, args, err := nodeToTerm(n)
if err != nil {
return err
}
switch name {
case "Path":
if len(args) != 1 {
return errors.New("invalid path predicate")
}
p, ok := args[0].(string)
if !ok {
return errors.New("invalid path predicate")
}
r.Path = p
case "Host":
if len(args) != 1 {
return errors.New("invalid host predicate")
}
h, ok := args[0].(string)
if !ok {
return errors.New("invalid host predicate")
}
r.HostRegexps = append(r.HostRegexps, h)
case "PathRegexp":
if len(args) != 1 {
return errors.New("invalid path regexp predicate")
}
p, ok := args[0].(string)
if !ok {
return errors.New("invalid path regexp predicate")
}
r.PathRegexps = append(r.PathRegexps, p)
case "Method":
if len(args) != 1 {
return errors.New("invalid method predicate")
}
m, ok := args[0].(string)
if !ok {
return errors.New("invalid method predicate")
}
r.Method = m
case "Header":
if len(args) != 2 {
return errors.New("invalid header predicate")
}
name, ok := args[0].(string)
if !ok {
return errors.New("invalid header predicate")
}
value, ok := args[1].(string)
if !ok {
return errors.New("invalid header predicate")
}
if r.Headers == nil {
r.Headers = make(map[string]string)
}
r.Headers[name] = value
case "HeaderRegexp":
if len(args) != 2 {
return errors.New("invalid header regexp predicate")
}
name, ok := args[0].(string)
if !ok {
return errors.New("invalid header regexp predicate")
}
value, ok := args[1].(string)
if !ok {
return errors.New("invalid header regexp predicate")
}
if r.HeaderRegexps == nil {
r.HeaderRegexps = make(map[string][]string)
}
r.HeaderRegexps[name] = append(r.HeaderRegexps[name], value)
default:
r.Predicates = append(r.Predicates, &eskip.Predicate{Name: name, Args: args})
}
return nil
}
func nodeToFilter(n *Node) (*eskip.Filter, error) {
name, args, err := nodeToTerm(n)
if err != nil {
return nil, err
}
return &eskip.Filter{Name: name, Args: args}, nil
}
func nodeToBackend(r *eskip.Route, n *Node) error {
switch n.Name {
case "string":
b, err := unquoteString(n.Text())
if err != nil {
return err
}
r.BackendType = eskip.NetworkBackend
r.Backend = b
case "shunt":
r.BackendType = eskip.ShuntBackend
case "loopback":
r.BackendType = eskip.LoopBackend
default:
return errors.New("invalid backend type")
}
return nil
}
func nodeToEskipDefinition(n *Node) (*eskip.Route, error) {
ns := n.Nodes
if len(ns) < 2 || len(ns[1].Nodes) == 0 {
return nil, fmt.Errorf("invalid definition length: %d", len(ns))
}
r := &eskip.Route{}
if ns[0].Name != "symbol" {
return nil, errors.New("invalid definition id")
}
r.Id, ns = ns[0].Text(), ns[1].Nodes
predicates:
for i, ni := range ns {
switch ni.Name {
case "predicate":
if err := nodeToPredicate(r, ni); err != nil {
return nil, err
}
case "filter", "string", "shunt", "loopback":
ns = ns[i:]
break predicates
default:
return nil, errors.New("invalid definition item among predicates")
}
}
filters:
for i, ni := range ns {
switch ni.Name {
case "filter":
f, err := nodeToFilter(ni)
if err != nil {
return nil, err
}
r.Filters = append(r.Filters, f)
case "string", "shunt", "loopback":
ns = ns[i:]
break filters
default:
return nil, errors.New("invalid definition item among filters")
}
}
if len(ns) != 1 {
return nil, fmt.Errorf("invalid definition backend, remaining definition length: %d, %s",
len(ns), n.Text())
}
if err := nodeToBackend(r, ns[0]); err != nil {
return nil, err
}
return r, nil
}
func treeToEskip(n []*Node) ([]*eskip.Route, error) {
r := make([]*eskip.Route, 0, len(n))
for _, ni := range n {
d, err := nodeToEskipDefinition(ni)
if err != nil {
return nil, err
}
r = append(r, d)
}
return r, nil
}
func checkTerm(t *testing.T, gotName, expectedName string, gotArgs, expectedArgs []interface{}) {
if gotName != expectedName {
t.Error("invalid term name")
return
}
// legacy bug support
for i := len(expectedArgs) - 1; i >= 0; i-- {
if _, ok := expectedArgs[i].(int); ok {
expectedArgs = append(expectedArgs[:i], expectedArgs[i+1:]...)
continue
}
if v, ok := expectedArgs[i].(float64); ok && v < 0 {
gotArgs = append(gotArgs[:i], gotArgs[i+1:]...)
expectedArgs = append(expectedArgs[:i], expectedArgs[i+1:]...)
}
}
if len(gotArgs) != len(expectedArgs) {
t.Error("invalid term args length", len(gotArgs), len(expectedArgs))
return
}
for i, a := range gotArgs {
if a != expectedArgs[i] {
t.Error("invalid term arg")
return
}
}
}
func checkPredicates(t *testing.T, got, expected *eskip.Route) {
if got.Path != expected.Path {
t.Error("invalid path")
return
}
if len(got.HostRegexps) != len(expected.HostRegexps) {
t.Error("invalid host length")
return
}
for i, h := range got.HostRegexps {
if h != expected.HostRegexps[i] {
t.Error("invalid host")
return
}
}
if len(got.PathRegexps) != len(expected.PathRegexps) {
t.Error("invalid path regexp length", len(got.PathRegexps), len(expected.PathRegexps))
return
}
for i, h := range got.PathRegexps {
if h != expected.PathRegexps[i] {
t.Error("invalid path regexp")
return
}
}
if got.Method != expected.Method {
t.Error("invalid method")
return
}
if len(got.Headers) != len(expected.Headers) {
t.Error("invalid headers length")
return
}
for n, h := range got.Headers {
he, ok := expected.Headers[n]
if !ok {
t.Error("invalid header name")
return
}
if he != h {
t.Error("invalid header")
return
}
}
if len(got.HeaderRegexps) != len(expected.HeaderRegexps) {
t.Error("invalid header regexp length")
return
}
for n, h := range got.HeaderRegexps {
he, ok := expected.HeaderRegexps[n]
if !ok {
t.Error("invalid header regexp name")
return
}
if len(h) != len(he) {
t.Error("invalid header regexp item length")
return
}
for i, hi := range h {
if hi != he[i] {
t.Error("invalid header regexp")
return
}
}
}
if len(got.Predicates) != len(expected.Predicates) {
t.Error("invalid predicates length")
return
}
for i, p := range got.Predicates {
checkTerm(
t,
p.Name, expected.Predicates[i].Name,
p.Args, expected.Predicates[i].Args,
)
if t.Failed() {
t.Log(p.Name, expected.Predicates[i].Name)
t.Log(p.Args, expected.Predicates[i].Args)
return
}
}
}
func checkFilters(t *testing.T, got, expected []*eskip.Filter) {
if len(got) != len(expected) {
t.Error("invalid filters length")
return
}
for i, f := range got {
checkTerm(
t,
f.Name, expected[i].Name,
f.Args, expected[i].Args,
)
if t.Failed() {
return
}
}
}
func checkBackend(t *testing.T, got, expected *eskip.Route) {
if got.BackendType != expected.BackendType {
t.Error("invalid backend type")
return
}
if got.Backend != expected.Backend {
t.Error("invalid backend")
return
}
}
func checkRoute(t *testing.T, got, expected *eskip.Route) {
if got.Id != expected.Id {
t.Error("invalid route id")
return
}
checkPredicates(t, got, expected)
if t.Failed() {
return
}
checkFilters(t, got.Filters, expected.Filters)
if t.Failed() {
return
}
checkBackend(t, got, expected)
}
func checkEskip(t *testing.T, got, expected []*eskip.Route) {
if len(got) != len(expected) {
t.Error("invalid length", len(got), len(expected))
return
}
for i, ri := range got {
checkRoute(t, ri, expected[i])
if t.Failed() {
t.Log(ri.String())
t.Log(expected[i].String())
return
}
}
}
func eskipTreeToEskip(n *Node) ([]*eskip.Route, error) {
return treeToEskip(n.Nodes)
}
func TestEskip(t *testing.T) {
r := generateEskip(1 << 9)
e := eskip.Print(true, r...)
b := bytes.NewBufferString(e)
s, err := testSyntax("eskip.p", 0)
if err != nil {
t.Error(err)
return
}
n, err := s.Parse(b)
if err != nil {
t.Error(err)
return
}
rback, err := eskipTreeToEskip(n)
if err != nil {
t.Error(err)
return
}
checkEskip(t, rback, r)
}

14
json.p Normal file
View File

@ -0,0 +1,14 @@
// JSON (http://www.json.org)
ws:alias = [ \b\f\n\r\t];
true = "true";
false = "false";
null = "null";
string = "\"" ([^\\"\b\f\n\r\t] | "\\" (["\\/bfnrt] | "u" [0-9a-f]{4}))* "\"";
number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?;
entry = string ws* ":" ws* value;
object = "{" ws* (entry (ws* "," ws* entry)*)? ws* "}";
array = "[" ws* (value (ws* "," ws* value)*)? ws* "]";
value:alias = true | false | null | string | number | object | array;
json = value;
// TODO: value should be an alias but test it first like this

557
json_test.go Normal file
View File

@ -0,0 +1,557 @@
package parse
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"math"
"math/rand"
"testing"
)
type jsonValueType int
const (
jsonNone jsonValueType = iota
jsonTrue
jsonFalse
jsonNull
jsonString
jsonNumber
jsonObject
jsonArray
)
const (
maxStringLength = 64
meanStringLength = 18
maxKeyLength = 24
meanKeyLength = 6
maxObjectLength = 12
meanObjectLength = 6
maxArrayLength = 64
meanArrayLength = 8
)
func randomLength(max, mean int) int {
return int(rand.NormFloat64()*float64(max)/math.MaxFloat64 + float64(mean))
}
func generateString(max, mean int) string {
l := randomLength(max, mean)
b := make([]byte, l)
for i := range b {
b[i] = byte(rand.Intn(int('z')-int('a')+1)) + 'a'
}
return string(b)
}
func generateJSONString() string {
return generateString(maxStringLength, meanStringLength)
}
func generateJSONNumber() interface{} {
if rand.Intn(2) == 1 {
return rand.NormFloat64()
}
n := rand.Int()
if rand.Intn(2) == 0 {
return n
}
return -n
}
func generateKey() string {
return generateString(maxKeyLength, meanKeyLength)
}
func generateJSONObject(minDepth int) map[string]interface{} {
l := randomLength(maxObjectLength, meanObjectLength)
o := make(map[string]interface{})
for i := 0; i < l; i++ {
o[generateKey()] = generateJSON(0)
}
if minDepth > 0 {
o[generateKey()] = generateJSON(minDepth)
}
return o
}
func generateJSONArray(minDepth int) []interface{} {
l := randomLength(maxArrayLength, meanArrayLength)
a := make([]interface{}, l, l+1)
for i := 0; i < l; i++ {
a[i] = generateJSON(0)
}
if minDepth > 0 {
a = append(a, generateJSON(minDepth))
}
return a
}
func generateJSONObjectOrArray(minDepth int) interface{} {
if rand.Intn(2) == 0 {
return generateJSONObject(minDepth - 1)
}
return generateJSONArray(minDepth - 1)
}
func generateJSON(minDepth int) interface{} {
if minDepth > 0 {
return generateJSONObjectOrArray(minDepth)
}
switch jsonValueType(rand.Intn(int(jsonNumber)) + 1) {
case jsonTrue:
return true
case jsonFalse:
return false
case jsonNull:
return nil
case jsonString:
return generateJSONString()
case jsonNumber:
return generateJSONNumber()
default:
panic("invalid json type")
}
}
func unqouteJSONString(t string) (string, error) {
var s string
err := json.Unmarshal([]byte(t), &s)
return s, err
}
func parseJSONNumber(t string) (interface{}, error) {
n := json.Number(t)
if i, err := n.Int64(); err == nil {
return int(i), nil
}
return n.Float64()
}
func nodeToJSONObject(n *Node) (map[string]interface{}, error) {
o := make(map[string]interface{})
for _, ni := range n.Nodes {
if len(ni.Nodes) != 2 {
return nil, errors.New("invalid json object")
}
key, err := unqouteJSONString(ni.Nodes[0].Text())
if err != nil {
return nil, err
}
val, err := treeToJSON(ni.Nodes[1])
if err != nil {
return nil, err
}
o[key] = val
}
return o, nil
}
func nodeToJSONArray(n *Node) ([]interface{}, error) {
a := make([]interface{}, 0, len(n.Nodes))
for _, ni := range n.Nodes {
item, err := treeToJSON(ni)
if err != nil {
return nil, err
}
a = append(a, item)
}
return a, nil
}
func treeToJSON(n *Node) (interface{}, error) {
switch n.Name {
case "true":
return true, nil
case "false":
return false, nil
case "null":
return nil, nil
case "string":
return unqouteJSONString(n.Text())
case "number":
return parseJSONNumber(n.Text())
case "object":
return nodeToJSONObject(n)
case "array":
return nodeToJSONArray(n)
default:
return nil, fmt.Errorf("invalid json node name: %s", n.Name)
}
}
func checkJSON(t *testing.T, got, expected interface{}) {
if expected == nil {
if got != nil {
t.Error("expected nil", got)
}
return
}
switch v := expected.(type) {
case bool:
if v != got.(bool) {
t.Error("expected bool", got)
}
case string:
if v != got.(string) {
t.Error("expected string", got)
}
case int:
if v != got.(int) {
t.Error("expected int", got)
}
case float64:
if v != got.(float64) {
t.Error("expected float64", got)
}
case map[string]interface{}:
o, ok := got.(map[string]interface{})
if !ok {
t.Error("expected object", got)
return
}
if len(v) != len(o) {
t.Error("invalid object length, expected: %d, got: %d", len(v), len(o))
return
}
for key, val := range v {
gotVal, ok := o[key]
if !ok {
t.Error("expected key not found: %s", key)
return
}
checkJSON(t, gotVal, val)
if t.Failed() {
return
}
}
case []interface{}:
a, ok := got.([]interface{})
if !ok {
t.Error("expected array", got)
}
if len(v) != len(a) {
t.Error("invalid array length, expected: %d, got: %d", len(v), len(a))
return
}
for i := range v {
checkJSON(t, a[i], v[i])
if t.Failed() {
return
}
}
default:
t.Error("unexpected parsed type", v)
}
}
func jsonTreeToJSON(n *Node) (interface{}, error) {
if n.Name != "json" {
return nil, fmt.Errorf("invalid root node name: %s", n.Name)
}
if len(n.Nodes) != 1 {
return nil, fmt.Errorf("invalid root node length: %d", len(n.Nodes))
}
return treeToJSON(n.Nodes[0])
}
func TestJSON(t *testing.T) {
test(t, "json.p", "value", []testItem{{
msg: "true",
text: "true",
node: &Node{
Name: "json",
Nodes: []*Node{{
Name: "true",
}},
},
ignorePosition: true,
}, {
msg: "false",
text: "false",
node: &Node{
Name: "json",
Nodes: []*Node{{
Name: "false",
}},
},
ignorePosition: true,
}, {
msg: "null",
text: "null",
node: &Node{
Name: "json",
Nodes: []*Node{{
Name: "null",
}},
},
ignorePosition: true,
}, {
msg: "string",
text: `"\"\\n\b\t\uabcd"`,
node: &Node{
Name: "json",
Nodes: []*Node{{
Name: "string",
}},
},
ignorePosition: true,
}, {
msg: "number",
text: "6.62e-34",
node: &Node{
Name: "json",
Nodes: []*Node{{
Name: "number",
}},
},
ignorePosition: true,
}, {
msg: "object",
text: `{
"true": true,
"false": false,
"null": null,
"string": "string",
"number": 42,
"object": {},
"array": []
}`,
node: &Node{
Name: "json",
Nodes: []*Node{{
Name: "object",
Nodes: []*Node{{
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "true",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "false",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "null",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "string",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "number",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "object",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "array",
}},
}},
}},
},
ignorePosition: true,
}, {
msg: "array",
text: `[true, false, null, "string", 42, {
"true": true,
"false": false,
"null": null,
"string": "string",
"number": 42,
"object": {},
"array": []
}, []]`,
node: &Node{
Name: "json",
Nodes: []*Node{{
Name: "array",
Nodes: []*Node{{
Name: "true",
}, {
Name: "false",
}, {
Name: "null",
}, {
Name: "string",
}, {
Name: "number",
}, {
Name: "object",
Nodes: []*Node{{
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "true",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "false",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "null",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "string",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "number",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "object",
}},
}, {
Name: "entry",
Nodes: []*Node{{
Name: "string",
}, {
Name: "array",
}},
}},
}, {
Name: "array",
}},
}},
},
ignorePosition: true,
}, {
msg: "bugfix, 100",
text: "100",
node: &Node{
Name: "json",
Nodes: []*Node{{
Name: "number",
}},
},
ignorePosition: true,
}})
}
func TestRandomJSON(t *testing.T) {
j := generateJSON(48)
b, err := json.Marshal(j)
if err != nil {
t.Error(err)
return
}
buf := bytes.NewBuffer(b)
s, err := testSyntax("json.p", 0)
if err != nil {
t.Error(err)
return
}
testParse := func(t *testing.T, buf io.Reader) {
n, err := s.Parse(buf)
if err != nil {
t.Error(err)
return
}
jback, err := jsonTreeToJSON(n)
if err != nil {
t.Error(err)
return
}
checkJSON(t, jback, j)
}
t.Run("unindented", func(t *testing.T) {
testParse(t, buf)
})
indented := bytes.NewBuffer(nil)
if err := json.Indent(indented, b, "", " "); err != nil {
t.Error(err)
return
}
t.Run("indented", func(t *testing.T) {
testParse(t, indented)
})
indentedTabs := bytes.NewBuffer(nil)
if err := json.Indent(indentedTabs, b, "", "\t"); err != nil {
t.Error(err)
return
}
t.Run("indented with tabs", func(t *testing.T) {
testParse(t, indentedTabs)
})
}

29
keyval.p Normal file
View File

@ -0,0 +1,29 @@
ws:alias = [ \b\f\r\t\v];
wsnl:alias = ws | "\n";
comment-line:alias = "#" [^\n]*;
comment = comment-line (ws* "\n" ws* comment-line)*;
wsc:alias = ws | comment-line;
wsnlc:alias = wsnl | comment-line;
quoted:alias = "\"" ([^\\"] | "\\" .)* "\"";
symbol-non-ws:alias = ([^\\"\n=#.\[\] \b\f\r\t\v] | "\\" .)+;
symbol = symbol-non-ws (ws* symbol-non-ws)* | quoted;
key-form:alias = symbol (ws* "." ws* symbol)*;
key = key-form;
group-key = (comment "\n" ws*)? "[" ws* key-form ws* "]";
value-chars:alias = ([^\\"\n=# \b\f\r\t\v] | "\\" .)+;
value = value-chars (ws* value-chars)* | quoted;
key-val = (comment "\n" ws*)? (key | key? ws* "=" ws* value?);
entry:alias = group-key | key-val;
doc:root = (entry (ws* comment-line)? | wsnlc)*;
// TODO: not tested
// set as root for streaming:
single-entry = (entry (ws* comment-line)?
| wsnlc* entry (ws* comment-line)?)
[];

394
keyval_test.go Normal file
View File

@ -0,0 +1,394 @@
package parse
import "testing"
func TestKeyVal(t *testing.T) {
test(t, "keyval.p", "doc", []testItem{{
msg: "empty",
}, {
msg: "a comment",
text: "# a comment",
}, {
msg: "a key",
text: "a key",
nodes: []*Node{{
Name: "key-val",
to: 5,
Nodes: []*Node{{
Name: "key",
to: 5,
Nodes: []*Node{{
Name: "symbol",
to: 5,
}},
}},
}},
}, {
msg: "a key with a preceeding whitespace",
text: " a key",
nodes: []*Node{{
Name: "key-val",
from: 1,
to: 6,
Nodes: []*Node{{
Name: "key",
from: 1,
to: 6,
Nodes: []*Node{{
Name: "symbol",
from: 1,
to: 6,
}},
}},
}},
}, {
msg: "a key and a comment",
text: `
# a comment
a key
`,
nodes: []*Node{{
Name: "key-val",
from: 20,
to: 25,
Nodes: []*Node{{
Name: "key",
from: 20,
to: 25,
Nodes: []*Node{{
Name: "symbol",
from: 20,
to: 25,
}},
}},
}},
}, {
msg: "a key value pair",
text: "a key = a value",
nodes: []*Node{{
Name: "key-val",
to: 15,
Nodes: []*Node{{
Name: "key",
to: 5,
Nodes: []*Node{{
Name: "symbol",
to: 5,
}},
}, {
Name: "value",
from: 8,
to: 15,
}},
}},
}, {
msg: "key value pairs with a comment at the end of line",
text: `
a key = a value # a comment
another key = another value # another comment
`,
nodes: []*Node{{
Name: "key-val",
from: 11,
to: 32,
Nodes: []*Node{{
Name: "key",
from: 11,
to: 16,
Nodes: []*Node{{
Name: "symbol",
from: 11,
to: 16,
}},
}, {
Name: "value",
from: 25,
to: 32,
}},
}, {
Name: "key-val",
from: 61,
to: 88,
Nodes: []*Node{{
Name: "key",
from: 61,
to: 72,
Nodes: []*Node{{
Name: "symbol",
from: 61,
to: 72,
}},
}, {
Name: "value",
from: 75,
to: 88,
}},
}},
}, {
msg: "value without a key",
text: "= a value",
nodes: []*Node{{
Name: "key-val",
to: 9,
Nodes: []*Node{{
Name: "value",
from: 2,
to: 9,
}},
}},
}, {
msg: "a key value pair with comment",
text: `
# a comment
a key = a value
`,
nodes: []*Node{{
Name: "key-val",
from: 4,
to: 34,
Nodes: []*Node{{
Name: "comment",
from: 4,
to: 15,
}, {
Name: "key",
from: 19,
to: 24,
Nodes: []*Node{{
Name: "symbol",
from: 19,
to: 24,
}},
}, {
Name: "value",
from: 27,
to: 34,
}},
}},
}, {
msg: "a key with multiple symbols",
text: "a key . with.multiple.symbols=a value",
nodes: []*Node{{
Name: "key-val",
to: 37,
Nodes: []*Node{{
Name: "key",
from: 0,
to: 29,
Nodes: []*Node{{
Name: "symbol",
from: 0,
to: 5,
}, {
Name: "symbol",
from: 8,
to: 12,
}, {
Name: "symbol",
from: 13,
to: 21,
}, {
Name: "symbol",
from: 22,
to: 29,
}},
}, {
Name: "value",
from: 30,
to: 37,
}},
}},
}, {
msg: "a group key",
text: `
# a comment
[a group key.empty]
`,
nodes: []*Node{{
Name: "group-key",
from: 4,
to: 38,
Nodes: []*Node{{
Name: "comment",
from: 4,
to: 15,
}, {
Name: "symbol",
from: 20,
to: 31,
}, {
Name: "symbol",
from: 32,
to: 37,
}},
}},
}, {
msg: "a group key with multiple values",
text: `
[foo.bar.baz]
= one
= two
= three
`,
nodes: []*Node{{
Name: "group-key",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "value",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "value",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "value",
}},
}},
ignorePosition: true,
}, {
msg: "a group key with multiple values, in a single line",
text: "[foo.bar.baz] = one = two = three",
nodes: []*Node{{
Name: "group-key",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "value",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "value",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "value",
}},
}},
ignorePosition: true,
}, {
msg: "full example",
text: `
# a keyval document
key1 = foo
key1.a = bar
key1.b = baz
key2 = qux
# foo bar baz values
[foo.bar.baz]
a = 1
b = 2 # even
c = 3
`,
nodes: []*Node{{
Name: "key-val",
Nodes: []*Node{{
Name: "key",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "value",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "key",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "symbol",
}},
}, {
Name: "value",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "key",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "symbol",
}},
}, {
Name: "value",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "key",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "value",
}},
}, {
Name: "group-key",
Nodes: []*Node{{
Name: "comment",
}, {
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "key",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "value",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "key",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "value",
}},
}, {
Name: "key-val",
Nodes: []*Node{{
Name: "key",
Nodes: []*Node{{
Name: "symbol",
}},
}, {
Name: "value",
}},
}},
ignorePosition: true,
}})
}

527
mml.p Normal file
View File

@ -0,0 +1,527 @@
// whitespace is ignored except for \n which is only ignored
// most of the time, but can serve as separator in:
// - list
// - struct
// - function args
// - statements
// - list, struct and function type constraints
ws:alias = " " | "\b" | "\f" | "\r" | "\t" | "\v";
wsnl:alias = ws | "\n";
wsc:alias = ws | comment;
wsnlc:alias = wsc | "\n";
// comments can be line or block comments
line-comment-content = [^\n]*;
line-comment:alias = "//" line-comment-content;
block-comment-content = ([^*] | "*" [^/])*;
block-comment:alias = "/*" block-comment-content "*/";
comment-part:alias = line-comment | block-comment;
comment = comment-part (ws* "\n"? ws* comment-part)*;
decimal-digit:alias = [0-9];
octal-digit:alias = [0-7];
hexa-digit:alias = [0-9a-fA-F];
// interger examples: 42, 0666, 0xfff
decimal:alias = [1-9] decimal-digit*;
octal:alias = "0" octal-digit*;
hexa:alias = "0" [xX] hexa-digit+;
int = decimal | octal | hexa;
// float examples: .0, 0., 3.14, 1E-12
exponent:alias = [eE] [+\-]? decimal-digit+;
float = decimal-digit+ "." decimal-digit* exponent?
| "." decimal-digit+ exponent?
| decimal-digit+ exponent;
// string example: "Hello, world!"
// only \ and " need to be escaped, e.g. allows new lines
// common escaped chars get unescaped, the rest gets unescaped to themselves
string = "\"" ([^\\"] | "\\" .)* "\"";
true = "true";
false = "false";
bool:alias = true | false;
// symbols normally can have only \w chars: fooBar_baz
// basic symbols cannot start with a digit
// some positions allow strings to be used as symbols, e.g: let "123" 123
// when this is not possible, dynamic symbols need to be used, but they are
// not allowed in every case, e.g: {symbol(foo()): "bar"}
// TODO: needs decision log for dynamic symbol
// TODO: exclude keywords
//
// dynamic symbol decision log:
// - every value is equatable
// - structs can act as hashtables (optimization is transparent)
// - in structs, must differentiate between symbol and value of a symbol when used as a key
// - js style [a] would be enough for the structs
// - the variables in a scope are like fields in a struct
// - [a] would be ambigous with the list as an expression
// - a logical loophole is closed with symbol(a)
// - dynamic-symbols need to be handled differently in match expressions and type expressions
symbol = [a-zA-Z_][a-zA-Z_0-9]*;
static-symbol:alias = symbol | string;
dynamic-symbol = "symbol" wsc* "(" wsnlc* expression wsnlc* ")";
symbol-expression:alias = static-symbol | dynamic-symbol;
// TODO: what happens when a dynamic symbol gets exported?
// list items are separated by comma or new line (or both)
/*
[]
[a, b, c]
[
a
b
c
]
[1, 2, a..., [b, c], [d, [e]]...]
*/
spread-expression = primary-expression wsc* "...";
list-sep:alias = wsc* ("," | "\n") (wsnlc | ",")*;
list-item:alias = expression | spread-expression;
expression-list:alias = list-item (list-sep list-item)*;
// list example: [1, 2, 3]
// lists can be constructed with other lists: [l1..., l2...]
list-fact:alias = "[" (wsnlc | ",")* expression-list? (wsnlc | ",")* "]";
list = list-fact;
mutable-list = "~" wsnlc* list-fact;
indexer-symbol = "[" wsnlc* expression wsnlc* "]";
entry = (symbol-expression | indexer-symbol) wsnlc* ":" wsnlc* expression;
entry-list:alias = (entry | spread-expression) (list-sep (entry | spread-expression))*;
struct-fact:alias = "{" (wsnlc | ",")* entry-list? (wsnlc | ",")* "}";
struct = struct-fact;
mutable-struct = "~" wsnlc* struct-fact;
channel = "<>" | "<" wsnlc* int wsnlc* ">";
and-expression:doc = "and" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
or-expression:doc = "or" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
// TODO: use collect
argument-list:alias = static-symbol (list-sep static-symbol)*;
collect-symbol = "..." wsnlc* static-symbol;
function-fact:alias = "(" (wsnlc | ",")*
argument-list?
(wsnlc | ",")*
collect-symbol?
(wsnlc | ",")* ")" wsnlc*
expression;
function = "fn" wsnlc* function-fact; // can it ever cause a conflict with call and grouping?
effect = "fn" wsnlc* "~" wsnlc* function-fact;
/*
a[42]
a[3:9]
a[:9]
a[3:]
a[b][c][d]
a.foo
a."foo"
a.symbol(foo)
*/
range-from = expression;
range-to = expression;
range-expression:alias = range-from? wsnlc* ":" wsnlc* range-to?;
indexer-expression:alias = expression | range-expression;
expression-indexer:alias = primary-expression wsc* "[" wsnlc* indexer-expression wsnlc* "]";
symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression; // TODO: test with a float on a new line
indexer = expression-indexer | symbol-indexer;
function-application = primary-expression wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")";
if = "if" wsnlc* expression wsnlc* block
(wsnlc* "else" wsnlc* "if" wsnlc* expression wsnlc* block)*
(wsnlc* "else" wsnlc* block)?;
default = "default" wsnlc* ":";
default-line:alias = default (wsnlc | ";")* statement?;
case = "case" wsnlc* expression wsnlc* ":";
case-line:alias = case (wsnlc | ";")* statement?;
switch = "switch" wsnlc* expression? wsnlc* "{" (wsnlc | ";")*
((case-line | default-line) (sep (case-line | default-line | statement))*)?
(wsnlc | ";")* "}";
// TODO: empty case not handled
int-type = "int";
float-type = "float";
string-type = "string";
bool-type = "bool";
error-type = "error";
primitive-type:alias = int-type
| float-type
| string-type
| bool-type
| error-type;
type-alias-name:alias = static-symbol;
static-range-from = int;
static-range-to = int;
static-range-expression:alias = static-range-from? wsnlc* ":" wsnlc* static-range-to?;
items-quantifier = int | static-range-expression;
// TODO: maybe this can be confusing with matching constants. Shall we support matching constants, values?
items-type = items-quantifier
| type-set (wsnlc* ":" wsnlc* items-quantifier)?
| static-symbol wsnlc* type-set (wsnlc* ":" wsnlc* items-quantifier)?;
destructure-item = type-set | static-symbol wsnlc* type-set;
collect-destructure-item = "..." wsnlc* destructure-item?
(wsnlc* ":" items-quantifier)?;
list-destructure-type = destructure-item
(list-sep destructure-item)*
(list-sep collect-destructure-item)?
| collect-destructure-item;
list-type-fact:alias = "[" (wsnlc | ",")*
(items-type | list-destructure-type)?
(wsnlc | ",")* "]";
list-type = list-type-fact;
mutable-list-type = "~" wsnlc* list-type-fact;
destructure-match-item = match-set
| static-symbol wsnlc* match-set
| static-symbol wsnlc* static-symbol wsnlc* match-set;
collect-destructure-match-item = "..." wsnlc* destructure-match-item?
(wsnlc* ":" items-quantifier)?;
list-destructure-match = destructure-match-item
(list-sep destructure-match-item)*
(list-sep collect-destructure-match-item)?
| collect-destructure-match-item;
list-match-fact:alias = "[" (wsnlc | ",")*
(list-destructure-match | items-type)?
(wsnlc | ",")* "]";
list-match = list-match-fact;
mutable-list-match = "~" wsnlc* list-match;
entry-type = static-symbol (wsnlc* ":" wsnlc* destructure-item)?;
entry-types:alias = entry-type (list-sep entry-type)*;
struct-type-fact:alias = "{" (wsnlc | ",")* entry-types? (wsnlc | ",")* "}";
struct-type = struct-type-fact;
mutable-struct-type = "~" wsnlc* struct-type-fact;
entry-match = static-symbol (wsnlc* ":" wsnlc* destructure-match-item)?;
entry-matches:alias = entry-match (list-sep entry-match)*;
struct-match-fact:alias = "{" (wsnlc | ",")* entry-matches? (wsnlc | ",")* "}";
struct-match = struct-match-fact;
mutable-struct-match = "~" wsnlc* struct-match-fact;
arg-type = type-set | static-symbol wsnlc* type-set;
args-type:alias = arg-type (list-sep arg-type)*;
function-type-fact:alias = "(" wsnlc* args-type? wsnlc* ")"
(wsc* (type-set | static-symbol wsc* type-set))?;
function-type = "fn" wsnlc* function-type-fact;
effect-type = "fn" wsnlc* "~" wsnlc* function-type-fact;
// TODO: heavy naming crime
receive-direction = "receive";
send-direction = "send";
channel-type = "<" wsnlc*
(receive-direction | send-direction)? wsnlc*
destructure-item?
wsnlc* ">";
type-fact-group:alias = "(" wsnlc* type-fact wsnlc* ")";
type-fact:alias = primitive-type
| type-alias-name
| list-type
| mutable-list-type
| struct-type
| mutable-struct-type
| function-type
| effect-type
| channel-type
| type-fact-group;
type-set:alias = type-fact (wsnlc* "|" wsnlc* type-fact)*;
type-expression:alias = type-set | static-symbol wsc* type-set;
match-fact:alias = list-match
| mutable-list-match
| struct-match
| mutable-struct-match;
match-set:alias = type-set | match-fact;
match-expression:alias = match-set | static-symbol wsc* match-set;
match-case = "case" wsnlc* match-expression wsnlc* ":";
match-case-line:alias = match-case (wsnlc | ";")* statement?;
match = "match" wsnlc* expression wsnlc* "{" (wsnlc | ";")*
((match-case-line | default-line)
(sep (match-case-line | default-line | statement))*)?
(wsnlc | ";")* "}";
conditional:alias = if
| switch
| match;
receive-call = "receive" wsc* "(" (wsnlc | ",")* expression (wsnlc | ",")* ")";
receive-op = "<-" wsc* primary-expression;
receive-expression-group:alias = "(" wsnlc* receive-expression wsnlc* ")";
receive-expression:alias = receive-call | receive-op | receive-expression-group;
receive-assign-capture:alias = assignable wsnlc* ("=" wsnlc*)? receive-expression;
receive-assignment = "set" wsnlc* receive-assign-capture;
receive-assignment-equal = assignable wsnlc* "=" wsnlc* receive-expression;
receive-capture:alias = symbol-expression wsnlc* ("=" wsnlc*)? receive-expression;
receive-definition = "let" wsnlc* receive-capture;
receive-mutable-definition = "let" wcnl* "~" wsnlc* receive-capture;
receive-statement:alias = receive-assignment | receive-definition;
send-call:alias = "send" wsc* "(" (wsnlc | ",")* expression list-sep expression (wsnlc | ",")* ")";
send-op:alias = primary-expression wsc* "<-" wsc* expression;
send-call-group:alias = "(" wsnlc* send wsnlc* ")";
send = send-call | send-op | send-call-group;
close = "close" wsc* "(" (wsnlc | ",")* expression (wsnlc | ",")* ")";
communication-group:alias = "(" wsnlc* communication wsnlc* ")";
communication:alias = receive-expression | receive-statement | send | communication-group;
select-case = "case" wsnlc* communication wsnlc* ":";
select-case-line:alias = select-case (wsnlc | ";")* statement?;
select = "select" wsnlc* "{" (wsnlc | ";")*
((select-case-line | default-line)
(sep (select-case-line | default-line | statement))*)?
(wsnlc | ";")* "}";
go = "go" wsnlc* function-application;
/*
require . = "mml/foo"
require bar = "mml/foo"
require . "mml/foo"
require bar "mml/foo"
require "mml/foo"
require (
. = "mml/foo"
bar = "mml/foo"
. "mml/foo"
bar "mml/foo"
"mml/foo"
)
require ()
*/
require-inline = ".";
require-fact = string
| (static-symbol | require-inline) (wsnlc* "=")? wsnlc* string;
require-facts:alias = require-fact (list-sep require-fact)*;
require-statement:alias = "require" wsnlc* require-fact;
require-statement-group:alias = "require" wsc* "(" (wsnlc | ",")*
require-facts?
(wsnlc | ",")* ")";
require = require-statement | require-statement-group;
panic = "panic" wsc* "(" (wsnlc | ",")* expression (wsnlc | ",")* ")";
recover = "recover" wsc* "(" (wsnlc | ",")* ")";
block = "{" (wsnlc | ";")* statements? (wsnlc | ";")* "}";
expression-group:alias = "(" wsnlc* expression wsnlc* ")";
primary-expression:alias = int
| float
| string
| bool
| symbol
| dynamic-symbol
| list
| mutable-list
| struct
| mutable-struct
| channel
| and-expression // only documentation
| or-expression // only documentation
| function
| effect
| indexer
| function-application // pseudo-expression
| conditional // pseudo-expression
| receive-call
| select // pseudo-expression
| recover
| block // pseudo-expression
| expression-group;
plus = "+";
minus = "-";
logical-not = "!";
binary-not = "^";
unary-operator:alias = plus | minus | logical-not | binary-not;
unary-expression = unary-operator wsc* primary-expression | receive-op;
mul = "*";
div = "/";
mod = "%";
lshift = "<<";
rshift = ">>";
binary-and = "&";
and-not = "&^";
add = "+";
sub = "-";
binary-or = "|";
xor = "^";
eq = "==";
not-eq = "!=";
less = "<";
less-or-eq = "<=";
greater = ">";
greater-or-eq = ">=";
logical-and = "&&";
logical-or = "||";
chain = "->";
binary-op0:alias = mul | div | mod | lshift | rshift | binary-and | and-not;
binary-op1:alias = add | sub | binary-or | xor;
binary-op2:alias = eq | not-eq | less | less-or-eq | greater | greater-or-eq;
binary-op3:alias = logical-and;
binary-op4:alias = logical-or;
binary-op5:alias = chain;
operand0:alias = primary-expression | unary-expression;
operand1:alias = operand0 | binary0;
operand2:alias = operand1 | binary1;
operand3:alias = operand2 | binary2;
operand4:alias = operand3 | binary3;
operand5:alias = operand4 | binary4;
binary0 = operand0 wsc* binary-op0 wsc* operand0;
binary1 = operand1 wsc* binary-op1 wsc* operand1;
binary2 = operand2 wsc* binary-op2 wsc* operand2;
binary3 = operand3 wsc* binary-op3 wsc* operand3;
binary4 = operand4 wsc* binary-op4 wsc* operand4;
binary5 = operand5 wsc* binary-op5 wsc* operand5;
binary-expression:alias = binary0 | binary1 | binary2 | binary3 | binary4 | binary5;
ternary-expression = expression wsnlc* "?" wsnlc* expression wsnlc* ":" wsnlc* expression;
expression:alias = primary-expression
| unary-expression
| binary-expression
| ternary-expression;
// TODO: code()
// TODO: observability
break = "break";
continue = "continue";
loop-control:alias = break | continue;
in-expression = static-symbol wsnlc* "in" wsnlc* (expression | range-expression);
loop-expression = expression | in-expression;
loop = "for" wsnlc* (block | loop-expression wsnlc* block);
/*
a = b
set c = d
set e f
set (
g = h
i j
)
*/
assignable:alias = symbol-expression | indexer;
assign-capture = assignable wsnlc* ("=" wsnlc*)? expression;
assign-set:alias = "set" wsnlc* assign-capture;
assign-equal = assignable wsnlc* "=" wsnlc* expression;
assign-captures:alias = assign-capture (list-sep assign-capture)*;
assign-group:alias = "set" wsnlc* "(" (wsnlc | ",")* assign-captures? (wsnlc | ",")* ")";
assignment = assign-set | assign-equal | assign-group;
/*
let a = b
let c d
let ~ e = f
let ~ g h
let (
i = j
k l
~ m = n
~ o p
)
let ~ (
q = r
s t
)
*/
value-capture-fact:alias = symbol-expression wsnlc* ("=" wsnlc*)? expression;
value-capture = value-capture-fact;
mutable-capture = "~" wsnlc* value-capture-fact;
value-definition = "let" wsnlc* (value-capture | mutable-capture);
value-captures:alias = value-capture (list-sep value-capture)*;
mixed-captures:alias = (value-capture | mutable-capture) (list-sep (value-capture | mutable-capture))*;
value-definition-group = "let" wsnlc* "(" (wsnlc | ",")* mixed-captures? (wsnlc | ",")* ")";
mutable-definition-group = "let" wsnlc* "~" wsnlc* "(" (wsnlc | ",")* value-captures? (wsnlc | ",")* ")";
/*
fn a() b
fn ~ c() d
fn (
e() f
~ g() h
)
fn ~ (
i()
j()
)
*/
function-definition-fact:alias = static-symbol wsnlc* function-fact;
function-capture = function-definition-fact;
effect-capture = "~" wsnlc* function-definition-fact;
function-definition = "fn" wsnlc* (function-capture | effect-capture);
function-captures:alias = function-capture (list-sep function-capture)*;
mixed-function-captures:alias = (function-capture | effect-capture)
(list-sep (function-capture | effect-capture))*;
function-definition-group = "fn" wsnlc* "(" (wsnlc | ",")*
mixed-function-captures?
(wsnlc | ",")* ")";
effect-definition-group = "fn" wsnlc* "~" wsnlc* "(" (wsnlc | ",")*
function-captures?
(wsnlc | ",")* ")";
definition:alias = value-definition
| value-definition-group
| mutable-definition-group
| function-definition
| function-definition-group
| effect-definition-group;
// TODO: cannot do:
// type alias a int|fn () string|error
// needs grouping of type-set
type-alias = "type" wsnlc* "alias" wsnlc* static-symbol wsnlc* type-set;
type-constraint = "type" wsnlc* static-symbol wsnlc* type-set;
statement-group:alias = "(" wsnlc* statement wsnlc* ")";
statement:alias = send
| close
| panic
| require
| loop-control
| go
| loop
| assignment
| definition
| expression
| type-alias
| type-constraint
| statement-group;
shebang-command = [^\n]*;
shebang = "#!" shebang-command "\n";
sep:alias = wsc* (";" | "\n") (wsnlc | ";")*;
statements:alias = statement (sep statement)*;
mml:root = shebang? (wsnlc | ";")* statements? (wsnlc | ";")*;

2791
mml_test.go Normal file

File diff suppressed because it is too large Load Diff

740
next_test.go Normal file
View File

@ -0,0 +1,740 @@
package parse
import (
"bytes"
"io"
"os"
"testing"
"time"
)
type testItem struct {
msg string
text string
fail bool
node *Node
nodes []*Node
ignorePosition bool
}
func testSyntaxReader(r io.Reader, traceLevel int) (*Syntax, error) {
trace := NewTrace(0)
b, err := bootSyntax(trace)
if err != nil {
return nil, err
}
doc, err := b.Parse(r)
if err != nil {
return nil, err
}
trace = NewTrace(traceLevel)
s := NewSyntax(trace)
if err := define(s, doc); err != nil {
return nil, err
}
if err := s.Init(); err != nil {
return nil, err
}
return s, nil
}
func testSyntaxString(s string, traceLevel int) (*Syntax, error) {
return testSyntaxReader(bytes.NewBufferString(s), traceLevel)
}
func testSyntax(file string, traceLevel int) (*Syntax, error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
defer f.Close()
return testSyntaxReader(f, traceLevel)
}
func checkNodesPosition(t *testing.T, left, right []*Node, position bool) {
if len(left) != len(right) {
t.Error("length doesn't match", len(left), len(right))
return
}
for len(left) > 0 {
checkNodePosition(t, left[0], right[0], position)
if t.Failed() {
return
}
left, right = left[1:], right[1:]
}
}
func checkNodePosition(t *testing.T, left, right *Node, position bool) {
if (left == nil) != (right == nil) {
t.Error("nil reference doesn't match", left == nil, right == nil)
return
}
if left == nil {
return
}
if left.Name != right.Name {
t.Error("name doesn't match", left.Name, right.Name)
return
}
if position && left.from != right.from {
t.Error("from doesn't match", left.Name, left.from, right.from)
return
}
if position && left.to != right.to {
t.Error("to doesn't match", left.Name, left.to, right.to)
return
}
if len(left.Nodes) != len(right.Nodes) {
t.Error("length doesn't match", left.Name, len(left.Nodes), len(right.Nodes))
t.Log(left)
t.Log(right)
for {
if len(left.Nodes) > 0 {
t.Log("<", left.Nodes[0])
left.Nodes = left.Nodes[1:]
}
if len(right.Nodes) > 0 {
t.Log(">", right.Nodes[0])
right.Nodes = right.Nodes[1:]
}
if len(left.Nodes) == 0 && len(right.Nodes) == 0 {
break
}
}
return
}
checkNodesPosition(t, left.Nodes, right.Nodes, position)
}
func checkNodes(t *testing.T, left, right []*Node) {
checkNodesPosition(t, left, right, true)
}
func checkNode(t *testing.T, left, right *Node) {
checkNodePosition(t, left, right, true)
}
func checkNodesIgnorePosition(t *testing.T, left, right []*Node) {
checkNodesPosition(t, left, right, false)
}
func checkNodeIgnorePosition(t *testing.T, left, right *Node) {
checkNodePosition(t, left, right, false)
}
func testReaderTrace(t *testing.T, r io.Reader, rootName string, traceLevel int, tests []testItem) {
s, err := testSyntaxReader(r, traceLevel)
if err != nil {
t.Error(err)
return
}
start := time.Now()
defer func() { t.Log("\ntotal duration", time.Since(start)) }()
for _, ti := range tests {
t.Run(ti.msg, func(t *testing.T) {
n, err := s.Parse(bytes.NewBufferString(ti.text))
if ti.fail && err == nil {
t.Error("failed to fail")
return
} else if !ti.fail && err != nil {
t.Error(err)
return
} else if ti.fail {
return
}
t.Log(n)
cn := checkNode
if ti.ignorePosition {
cn = checkNodeIgnorePosition
}
if ti.node != nil {
cn(t, n, ti.node)
} else {
cn(t, n, &Node{
Name: rootName,
from: 0,
to: len(ti.text),
Nodes: ti.nodes,
})
}
})
}
}
func testStringTrace(t *testing.T, s string, traceLevel int, tests []testItem) {
testReaderTrace(t, bytes.NewBufferString(s), "", traceLevel, tests)
}
func testString(t *testing.T, s string, tests []testItem) {
testStringTrace(t, s, 0, tests)
}
func testTrace(t *testing.T, file, rootName string, traceLevel int, tests []testItem) {
f, err := os.Open(file)
if err != nil {
t.Error(err)
return
}
defer f.Close()
testReaderTrace(t, f, rootName, traceLevel, tests)
}
func test(t *testing.T, file, rootName string, tests []testItem) {
testTrace(t, file, rootName, 0, tests)
}
func TestRecursion(t *testing.T) {
testString(
t,
`A = "a" | A "a"`,
[]testItem{{
msg: "recursion in choice, right, left, commit",
text: "aaa",
node: &Node{
Name: "A",
Nodes: []*Node{{
Name: "A",
Nodes: []*Node{{
Name: "A",
}},
}},
},
ignorePosition: true,
}},
)
testString(
t,
`A = "a" | "a" A`,
[]testItem{{
msg: "recursion in choice, right, right, commit",
text: "aaa",
node: &Node{
Name: "A",
Nodes: []*Node{{
Name: "A",
Nodes: []*Node{{
Name: "A",
}},
}},
},
ignorePosition: true,
}},
)
testString(
t,
`A = "a" A | "a"`,
[]testItem{{
msg: "recursion in choice, left, right, commit",
text: "aaa",
node: &Node{
Name: "A",
Nodes: []*Node{{
Name: "A",
Nodes: []*Node{{
Name: "A",
}},
}},
},
ignorePosition: true,
}},
)
testString(
t,
`A = A "a" | "a"`,
[]testItem{{
msg: "recursion in choice, left, left, commit",
text: "aaa",
node: &Node{
Name: "A",
Nodes: []*Node{{
Name: "A",
Nodes: []*Node{{
Name: "A",
}},
}},
},
ignorePosition: true,
}},
)
testString(
t,
`A':alias = "a" | A' "a"; A = A'`,
[]testItem{{
msg: "recursion in choice, right, left, alias",
text: "aaa",
node: &Node{
Name: "A",
to: 3,
},
}},
)
testString(
t,
`A':alias = "a" | "a" A'; A = A'`,
[]testItem{{
msg: "recursion in choice, right, right, alias",
text: "aaa",
node: &Node{
Name: "A",
to: 3,
},
}},
)
testString(
t,
`A':alias = "a" A' | "a"; A = A'`,
[]testItem{{
msg: "recursion in choice, left, right, alias",
text: "aaa",
node: &Node{
Name: "A",
to: 3,
},
}},
)
testString(
t,
`A':alias = A' "a" | "a"; A = A'`,
[]testItem{{
msg: "recursion in choice, left, left, alias",
text: "aaa",
node: &Node{
Name: "A",
to: 3,
},
}},
)
}
func TestSequence(t *testing.T) {
testString(
t,
`AB = "a" | "a"? "a"? "b" "b"`,
[]testItem{{
msg: "sequence with optional items",
text: "abb",
node: &Node{
Name: "AB",
to: 3,
},
}, {
msg: "sequence with optional items, none",
text: "bb",
node: &Node{
Name: "AB",
to: 2,
},
}},
)
testString(
t,
`A = "a" | (A?)*`,
[]testItem{{
msg: "sequence in choice with redundant quantifier",
text: "aaa",
node: &Node{
Name: "A",
Nodes: []*Node{{
Name: "A",
}, {
Name: "A",
}, {
Name: "A",
}},
},
ignorePosition: true,
}},
)
testString(
t,
`A = ("a"*)*`,
[]testItem{{
msg: "sequence with redundant quantifier",
text: "aaa",
node: &Node{
Name: "A",
to: 3,
},
}},
)
}
func TestQuantifiers(t *testing.T) {
testString(
t,
`A = "a" "b"{0} "a"`,
[]testItem{{
msg: "zero",
text: "aa",
node: &Node{
Name: "A",
to: 2,
},
}, {
msg: "zero, fail",
text: "aba",
fail: true,
}},
)
testString(
t,
`A = "a" "b"{1} "a"`,
[]testItem{{
msg: "one, missing",
text: "aa",
fail: true,
}, {
msg: "one",
text: "aba",
node: &Node{
Name: "A",
to: 3,
},
}, {
msg: "one, too much",
text: "abba",
fail: true,
}},
)
testString(
t,
`A = "a" "b"{3} "a"`,
[]testItem{{
msg: "three, missing",
text: "abba",
fail: true,
}, {
msg: "three",
text: "abbba",
node: &Node{
Name: "A",
to: 5,
},
}, {
msg: "three, too much",
text: "abbbba",
fail: true,
}},
)
testString(
t,
`A = "a" "b"{0,1} "a"`,
[]testItem{{
msg: "zero or one explicit, missing",
text: "aa",
node: &Node{
Name: "A",
to: 2,
},
}, {
msg: "zero or one explicit",
text: "aba",
node: &Node{
Name: "A",
to: 3,
},
}, {
msg: "zero or one explicit, too much",
text: "abba",
fail: true,
}},
)
testString(
t,
`A = "a" "b"{,1} "a"`,
[]testItem{{
msg: "zero or one explicit, omit zero, missing",
text: "aa",
node: &Node{
Name: "A",
to: 2,
},
}, {
msg: "zero or one explicit, omit zero",
text: "aba",
node: &Node{
Name: "A",
to: 3,
},
}, {
msg: "zero or one explicit, omit zero, too much",
text: "abba",
fail: true,
}},
)
testString(
t,
`A = "a" "b"? "a"`,
[]testItem{{
msg: "zero or one explicit, shortcut, missing",
text: "aa",
node: &Node{
Name: "A",
to: 2,
},
}, {
msg: "zero or one explicit, shortcut",
text: "aba",
node: &Node{
Name: "A",
to: 3,
},
}, {
msg: "zero or one explicit, shortcut, too much",
text: "abba",
fail: true,
}},
)
testString(
t,
`A = "a" "b"{0,3} "a"`,
[]testItem{{
msg: "zero or three, missing",
text: "aa",
node: &Node{
Name: "A",
to: 2,
},
}, {
msg: "zero or three",
text: "abba",
node: &Node{
Name: "A",
to: 4,
},
}, {
msg: "zero or three",
text: "abbba",
node: &Node{
Name: "A",
to: 5,
},
}, {
msg: "zero or three, too much",
text: "abbbba",
fail: true,
}},
)
testString(
t,
`A = "a" "b"{,3} "a"`,
[]testItem{{
msg: "zero or three, omit zero, missing",
text: "aa",
node: &Node{
Name: "A",
to: 2,
},
}, {
msg: "zero or three, omit zero",
text: "abba",
node: &Node{
Name: "A",
to: 4,
},
}, {
msg: "zero or three, omit zero",
text: "abbba",
node: &Node{
Name: "A",
to: 5,
},
}, {
msg: "zero or three, omit zero, too much",
text: "abbbba",
fail: true,
}},
)
testString(
t,
`A = "a" "b"{1,3} "a"`,
[]testItem{{
msg: "one or three, missing",
text: "aa",
fail: true,
}, {
msg: "one or three",
text: "abba",
node: &Node{
Name: "A",
to: 4,
},
}, {
msg: "one or three",
text: "abbba",
node: &Node{
Name: "A",
to: 5,
},
}, {
msg: "one or three, too much",
text: "abbbba",
fail: true,
}},
)
testString(
t,
`A = "a" "b"{3,5} "a"`,
[]testItem{{
msg: "three or five, missing",
text: "abba",
fail: true,
}, {
msg: "three or five",
text: "abbbba",
node: &Node{
Name: "A",
to: 6,
},
}, {
msg: "three or five",
text: "abbbbba",
node: &Node{
Name: "A",
to: 7,
},
}, {
msg: "three or five, too much",
text: "abbbbbba",
fail: true,
}},
)
testStringTrace(
t,
`A = "a" "b"{0,} "a"`,
1,
[]testItem{{
msg: "zero or more, explicit, missing",
text: "aa",
node: &Node{
Name: "A",
to: 2,
},
}, {
msg: "zero or more, explicit",
text: "abba",
node: &Node{
Name: "A",
to: 4,
},
}},
)
testStringTrace(
t,
`A = "a" "b"* "a"`,
1,
[]testItem{{
msg: "zero or more, shortcut, missing",
text: "aa",
node: &Node{
Name: "A",
to: 2,
},
}, {
msg: "zero or more, shortcut",
text: "abba",
node: &Node{
Name: "A",
to: 4,
},
}},
)
testStringTrace(
t,
`A = "a" "b"{1,} "a"`,
1,
[]testItem{{
msg: "one or more, explicit, missing",
text: "aa",
fail: true,
}, {
msg: "one or more, explicit",
text: "abba",
node: &Node{
Name: "A",
to: 4,
},
}},
)
testStringTrace(
t,
`A = "a" "b"+ "a"`,
1,
[]testItem{{
msg: "one or more, shortcut, missing",
text: "aa",
fail: true,
}, {
msg: "one or more, shortcut",
text: "abba",
node: &Node{
Name: "A",
to: 4,
},
}},
)
testStringTrace(
t,
`A = "a" "b"{3,} "a"`,
1,
[]testItem{{
msg: "three or more, explicit, missing",
text: "abba",
fail: true,
}, {
msg: "three or more, explicit",
text: "abbbba",
node: &Node{
Name: "A",
to: 6,
},
}},
)
}

89
node.go Normal file
View File

@ -0,0 +1,89 @@
package parse
import "fmt"
type Node struct {
Name string
Nodes []*Node
commitType CommitType
from, to int
tokens []rune
}
func newNode(name string, ct CommitType, from, to int) *Node {
return &Node{
Name: name,
commitType: ct,
from: from,
to: to,
}
}
func (n *Node) tokenLength() int {
return n.to - n.from
}
func (n *Node) nodeLength() int {
return len(n.Nodes)
}
func findNode(in, n *Node) {
if n == in {
panic(fmt.Errorf("found self in %s", in.Name))
}
for _, ni := range n.Nodes {
findNode(in, ni)
}
}
func (n *Node) append(p *Node) {
findNode(n, p)
n.Nodes = append(n.Nodes, p)
// TODO: check rather if n.from <= p.from??? or panic if less? or check rather node length and commit
// happens in the end anyway?
if n.from == 0 && n.to == 0 {
n.from = p.from
}
n.to = p.to
}
func (n *Node) clear() {
n.from = 0
n.to = 0
n.Nodes = nil
}
func (n *Node) applyTokens(t []rune) {
n.tokens = t
for _, ni := range n.Nodes {
ni.applyTokens(t)
}
}
func (n *Node) commit() {
var nodes []*Node
for _, ni := range n.Nodes {
ni.commit()
if ni.commitType&Alias != 0 {
nodes = append(nodes, ni.Nodes...)
} else {
nodes = append(nodes, ni)
}
}
n.Nodes = nodes
}
func (n *Node) String() string {
if n.from >= len(n.tokens) || n.to > len(n.tokens) {
return n.Name + ":incomplete"
}
return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.from, n.to, n.Text())
}
func (n *Node) Text() string {
return string(n.tokens[n.from:n.to])
}

69
parse.go Normal file
View File

@ -0,0 +1,69 @@
package parse
import (
"errors"
"fmt"
)
type definition interface {
nodeName() string
parser(*registry, []string) (parser, error)
commitType() CommitType
}
type parser interface {
nodeName() string
setIncludedBy(parser, []string)
cacheIncluded(*context, *Node)
parse(Trace, *context)
}
var errCannotIncludeParsers = errors.New("cannot include parsers")
func parserNotFound(name string) error {
return fmt.Errorf("parser not found: %s", name)
}
func stringsContain(ss []string, s string) bool {
for _, si := range ss {
if si == s {
return true
}
}
return false
}
func copyIncludes(to, from map[string]CommitType) {
if from == nil {
return
}
for name, ct := range from {
to[name] = ct
}
}
func mergeIncludes(left, right map[string]CommitType) map[string]CommitType {
m := make(map[string]CommitType)
copyIncludes(m, left)
copyIncludes(m, right)
return m
}
func parse(t Trace, p parser, c *context) (*Node, error) {
p.parse(t, c)
if c.readErr != nil {
return nil, c.readErr
}
if !c.match {
return nil, ErrInvalidInput
}
if err := c.finalize(); err != nil {
return nil, err
}
return c.node, nil
}

172
quantifier.go Normal file
View File

@ -0,0 +1,172 @@
package parse
type quantifierDefinition struct {
name string
commit CommitType
min, max int
item string
}
type quantifierParser struct {
name string
commit CommitType
min, max int
item parser
includedBy []parser
}
func newQuantifier(name string, ct CommitType, item string, min, max int) *quantifierDefinition {
return &quantifierDefinition{
name: name,
commit: ct,
min: min,
max: max,
item: item,
}
}
func (d *quantifierDefinition) nodeName() string { return d.name }
func (d *quantifierDefinition) parser(r *registry, path []string) (parser, error) {
if stringsContain(path, d.name) {
panic(errCannotIncludeParsers)
}
p, ok := r.parser(d.name)
if ok {
return p, nil
}
qp := &quantifierParser{
name: d.name,
commit: d.commit,
min: d.min,
max: d.max,
}
r.setParser(qp)
item, ok := r.parser(d.item)
if !ok {
itemDefinition, ok := r.definition(d.item)
if !ok {
return nil, parserNotFound(d.item)
}
var err error
item, err = itemDefinition.parser(r, path)
if err != nil {
return nil, err
}
}
qp.item = item
return qp, nil
}
func (d *quantifierDefinition) commitType() CommitType { return d.commit }
func (p *quantifierParser) nodeName() string { return p.name }
// TODO: merge the quantifier into the sequence
// DOC: sequences are hungry and are not revisited, a*a cannot match anything.
// DOC: how to match a tailing a? (..)*a | .(..)*a
func (p *quantifierParser) setIncludedBy(i parser, path []string) {
if stringsContain(path, p.name) {
panic(errCannotIncludeParsers)
}
p.includedBy = append(p.includedBy, i)
}
func (p *quantifierParser) cacheIncluded(*context, *Node) {
panic(errCannotIncludeParsers)
}
func (p *quantifierParser) parse(t Trace, c *context) {
t = t.Extend(p.name)
t.Out1("parsing quantifier", c.offset)
if p.commit&Documentation != 0 {
t.Out1("fail, doc")
c.fail(c.offset)
return
}
if c.excluded(c.offset, p.name) {
t.Out1("excluded")
c.fail(c.offset)
return
}
c.exclude(c.offset, p.name)
defer c.include(c.offset, p.name)
node := newNode(p.name, p.commit, c.offset, c.offset)
// this way of checking the cache definitely needs the testing of the russ cox form
for {
if p.max >= 0 && node.nodeLength() == p.max {
t.Out1("success, max reached")
c.cache.set(node.from, p.name, node)
for _, i := range p.includedBy {
i.cacheIncluded(c, node)
}
c.success(node)
return
}
t.Out2("next quantifier item")
// n, m, ok := c.cache.get(c.offset, p.item.nodeName())
m, ok := c.fromCache(p.item.nodeName())
if ok {
t.Out1("quantifier item found in cache, match:", m, c.offset, c.node.tokenLength())
if m {
node.append(c.node)
if c.node.tokenLength() > 0 {
t.Out2("taking next after cached found")
continue
}
}
if node.nodeLength() >= p.min {
t.Out1("success, no more match")
c.cache.set(node.from, p.name, node)
for _, i := range p.includedBy {
i.cacheIncluded(c, node)
}
c.success(node)
} else {
t.Out1("fail, min not reached")
c.cache.set(node.from, p.name, nil)
c.fail(node.from)
}
return
}
p.item.parse(t, c)
if !c.match || c.node.tokenLength() == 0 {
if node.nodeLength() >= p.min {
t.Out1("success, no more match")
c.cache.set(node.from, p.name, node)
for _, i := range p.includedBy {
i.cacheIncluded(c, node)
}
c.success(node)
} else {
t.Out1("fail, min not reached")
c.cache.set(node.from, p.name, nil)
c.fail(node.from)
}
return
}
node.append(c.node)
}
}

36
registry.go Normal file
View File

@ -0,0 +1,36 @@
package parse
type registry struct {
definitions map[string]definition
parsers map[string]parser
}
func newRegistry() *registry {
return &registry{
definitions: make(map[string]definition),
parsers: make(map[string]parser),
}
}
func (r *registry) definition(name string) (definition, bool) {
d, ok := r.definitions[name]
return d, ok
}
func (r *registry) parser(name string) (parser, bool) {
p, ok := r.parsers[name]
return p, ok
}
func (r *registry) setDefinition(d definition) error {
if _, ok := r.definitions[d.nodeName()]; ok {
return duplicateDefinition(d.nodeName())
}
r.definitions[d.nodeName()] = d
return nil
}
func (r *registry) setParser(p parser) {
r.parsers[p.nodeName()] = p
}

14
scheme.p Normal file
View File

@ -0,0 +1,14 @@
// TODO: comment
ws:alias = [ \b\f\n\r\t\v];
comment:alias = ";" [^\n]*;
wsc:alias = ws | comment;
number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?;
string = "\"" ([^\\"] | "\\" .)* "\"";
symbol = ([^\\ \n\t\b\f\r\v\"()\[\]#] | "\\" .)+;
list-form:alias = "(" wsc* (expression wsc*)* ")"
| "[" wsc* (expression wsc*)* "]";
list = list-form;
vector = "#" list-form;
expression:alias = number | string | symbol | list;
scheme = wsc* (expression wsc*)*;

84
scheme_test.go Normal file
View File

@ -0,0 +1,84 @@
package parse
import "testing"
func TestScheme(t *testing.T) {
test(t, "scheme.p", "scheme", []testItem{{
msg: "empty",
}, {
msg: "a function",
text: `
(define (foo a b c)
(let ([bar (+ a b c)]
[baz (- a b c)])
(* bar baz)))
`,
nodes: []*Node{{
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}},
}, {
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "list",
Nodes: []*Node{{
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}},
}},
}, {
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}},
}},
}},
}, {
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}},
}},
}},
}},
ignorePosition: true,
}})
}

187
sequence.go Normal file
View File

@ -0,0 +1,187 @@
package parse
type sequenceDefinition struct {
name string
commit CommitType
items []string
}
type sequenceParser struct {
name string
commit CommitType
items []parser
including []parser
}
func newSequence(name string, ct CommitType, items []string) *sequenceDefinition {
return &sequenceDefinition{
name: name,
commit: ct,
items: items,
}
}
func (d *sequenceDefinition) nodeName() string { return d.name }
func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error) {
if stringsContain(path, d.name) {
panic(errCannotIncludeParsers)
}
p, ok := r.parser(d.name)
if ok {
return p, nil
}
sp := &sequenceParser{
name: d.name,
commit: d.commit,
}
r.setParser(sp)
var items []parser
path = append(path, d.name)
for _, name := range d.items {
item, ok := r.parser(name)
if ok {
items = append(items, item)
continue
}
itemDefinition, ok := r.definition(name)
if !ok {
return nil, parserNotFound(name)
}
item, err := itemDefinition.parser(r, path)
if err != nil {
return nil, err
}
items = append(items, item)
}
// for single items, acts like a choice
if len(items) == 1 {
items[0].setIncludedBy(sp, path)
}
sp.items = items
return sp, nil
}
func (d *sequenceDefinition) commitType() CommitType {
return d.commit
}
func (p *sequenceParser) nodeName() string { return p.name }
func (p *sequenceParser) setIncludedBy(i parser, path []string) {
if stringsContain(path, p.name) {
return
}
p.including = append(p.including, i)
}
func (p *sequenceParser) cacheIncluded(c *context, n *Node) {
if !c.excluded(n.from, p.name) {
return
}
nc := newNode(p.name, p.commit, n.from, n.to)
nc.append(n)
c.cache.set(nc.from, p.name, nc)
// maybe it is enough to cache only those that are on the path
for _, i := range p.including {
i.cacheIncluded(c, nc)
}
}
/*
should be possible to parse:
a = "0"
b = "1"
c = a* e b
d = a | c
e = b | d
input: 111
*/
func (p *sequenceParser) parse(t Trace, c *context) {
t = t.Extend(p.name)
t.Out1("parsing sequence", c.offset)
if p.commit&Documentation != 0 {
t.Out1("fail, doc")
c.fail(c.offset)
return
}
// TODO: maybe we can check the cache here? no because that would exclude the continuations
if c.excluded(c.offset, p.name) {
t.Out1("excluded")
c.fail(c.offset)
return
}
c.exclude(c.offset, p.name)
defer c.include(c.offset, p.name)
items := p.items
node := newNode(p.name, p.commit, c.offset, c.offset)
for len(items) > 0 {
t.Out2("next sequence item")
// n, m, ok := c.cache.get(c.offset, items[0].nodeName())
m, ok := c.fromCache(items[0].nodeName())
if ok {
t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset)
if m {
t.Out2("sequence item from cache:", c.node.Name, len(c.node.Nodes), c.node.from)
node.append(c.node)
items = items[1:]
continue
}
c.cache.set(node.from, p.name, nil)
c.fail(node.from)
return
}
items[0].parse(t, c)
items = items[1:]
if !c.match {
t.Out1("fail, item failed")
c.cache.set(node.from, p.name, nil)
c.fail(node.from)
return
}
if c.node.tokenLength() > 0 {
t.Out2("appending sequence item", c.node.Name, len(c.node.Nodes))
node.append(c.node)
}
}
t.Out1("success, items parsed")
t.Out2("nodes", node.nodeLength())
if node.Name == "group" {
t.Out2("caching group", node.from, node.Nodes[2].Name, node.Nodes[2].nodeLength())
}
// is this cached item ever taken?
c.cache.set(node.from, p.name, node)
for _, i := range p.including {
i.cacheIncluded(c, node)
}
t.Out2("caching sequence and included by done")
c.success(node)
}

9
sexpr.p Normal file
View File

@ -0,0 +1,9 @@
ws:alias = [ \b\f\n\r\t\v];
comment:alias = ";" [^\n]*;
wsc:alias = ws | comment;
number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?;
string = "\"" ([^\\"] | "\\" .)* "\"";
symbol = ([^\\ \n\t\b\f\r\v\"()] | "\\" .)+;
list = "(" wsc* (expression wsc*)* ")";
expression:alias = number | string | symbol | list;
s-expression = expression;

71
sexpr_test.go Normal file
View File

@ -0,0 +1,71 @@
package parse
import "testing"
func TestSExpr(t *testing.T) {
test(t, "sexpr.p", "s-expression", []testItem{{
msg: "number",
text: "42",
nodes: []*Node{{
Name: "number",
}},
ignorePosition: true,
}, {
msg: "string",
text: "\"foo\"",
nodes: []*Node{{
Name: "string",
}},
ignorePosition: true,
}, {
msg: "symbol",
text: "foo",
nodes: []*Node{{
Name: "symbol",
}},
ignorePosition: true,
}, {
msg: "nil",
text: "()",
nodes: []*Node{{
Name: "list",
}},
ignorePosition: true,
}, {
msg: "list",
text: "(foo bar baz)",
nodes: []*Node{{
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "symbol",
}, {
Name: "symbol",
}},
}},
ignorePosition: true,
}, {
msg: "embedded list",
text: "(foo (bar (baz)) qux)",
nodes: []*Node{{
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}, {
Name: "list",
Nodes: []*Node{{
Name: "symbol",
}},
}},
}, {
Name: "symbol",
}},
}},
ignorePosition: true,
}})
}

158
syntax.go Normal file
View File

@ -0,0 +1,158 @@
package parse
import (
"bufio"
"errors"
"fmt"
"io"
)
type CommitType int
const (
None CommitType = 0
Alias CommitType = 1 << iota
Documentation
Root
)
type Syntax struct {
trace Trace
registry *registry
initialized bool
initFailed bool
rootSet bool
root definition
parser parser
}
var (
ErrSyntaxInitialized = errors.New("syntax initialized")
ErrInitFailed = errors.New("init failed")
ErrNoParsersDefined = errors.New("no parsers defined")
ErrInvalidInput = errors.New("invalid input")
ErrInvalidCharacter = errors.New("invalid character") // two use cases: utf8 and boot
ErrUnexpectedCharacter = errors.New("unexpected character")
ErrInvalidSyntax = errors.New("invalid syntax")
ErrRootAlias = errors.New("root node cannot be an alias")
)
func duplicateDefinition(name string) error {
return fmt.Errorf("duplicate definition: %s", name)
}
func NewSyntax(t Trace) *Syntax {
if t == nil {
t = NewTrace(0)
}
return &Syntax{
trace: t,
registry: newRegistry(),
}
}
func (s *Syntax) register(d definition) error {
if s.initialized {
return ErrSyntaxInitialized
}
if d.commitType()&Root != 0 {
s.root = d
s.rootSet = true
} else if !s.rootSet {
s.root = d
}
return s.registry.setDefinition(d)
}
func (s *Syntax) AnyChar(name string, ct CommitType) error {
return s.register(newChar(name, ct, true, false, nil, nil))
}
func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error {
return s.register(newChar(name, ct, false, not, chars, ranges))
}
func childName(name string, childIndex int) string {
return fmt.Sprintf("%s:%d", name, childIndex)
}
func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error {
var refs []string
for i, ci := range chars {
ref := childName(name, i)
refs = append(refs, ref)
if err := s.register(newChar(ref, Alias, false, false, []rune{ci}, nil)); err != nil {
return err
}
}
return s.Sequence(name, ct, refs...)
}
func (s *Syntax) Quantifier(name string, ct CommitType, item string, min, max int) error {
return s.register(newQuantifier(name, ct, item, min, max))
}
func (s *Syntax) Sequence(name string, ct CommitType, items ...string) error {
return s.register(newSequence(name, ct, items))
}
func (s *Syntax) Choice(name string, ct CommitType, elements ...string) error {
return s.register(newChoice(name, ct, elements))
}
func (s *Syntax) Read(r io.Reader) error {
if s.initialized {
return ErrSyntaxInitialized
}
return nil
}
func (s *Syntax) Init() error {
if s.initFailed {
return ErrInitFailed
}
if s.initialized {
return nil
}
if s.root == nil {
return ErrNoParsersDefined
}
if s.root.commitType()&Alias != 0 {
return ErrRootAlias
}
var err error
s.parser, err = s.root.parser(s.registry, nil)
if err != nil {
s.initFailed = true
return err
}
s.initialized = true
return nil
}
func (s *Syntax) Generate(w io.Writer) error {
if err := s.Init(); err != nil {
return err
}
return nil
}
func (s *Syntax) Parse(r io.Reader) (*Node, error) {
if err := s.Init(); err != nil {
return nil, err
}
c := newContext(bufio.NewReader(r))
return parse(s.trace, s.parser, c)
}

78
syntax.p Normal file
View File

@ -0,0 +1,78 @@
ws:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wsc:alias = ws | comment;
block-comment:alias = "/*" ("*" [^/] | [^*])* "*/";
line-comment:alias = "//" [^\n]*;
comment-segment:alias = line-comment | block-comment;
ws-no-nl:alias = " " | "\t" | "\b" | "\f" | "\r" | "\v";
comment = comment-segment (ws-no-nl* "\n"? ws-no-nl* comment-segment)*;
any-char = "."; // equivalent to [^]
// TODO: document matching terminal: []
// TODO: handle char class equivalences
// TODO: enable streaming
// TODO: set route function in generated code?
// caution: newline is accepted
class-not = "^";
class-char = [^\\\[\]\^\-] | "\\" .;
char-range = class-char "-" class-char;
char-class = "[" class-not? (class-char | char-range)* "]";
// caution: newline is accepted
sequence-char = [^\\"] | "\\" .;
char-sequence = "\"" sequence-char* "\"";
// TODO: this can be mixed up with sequence. Is it fine? fix this, see mml symbol
terminal:alias = any-char | char-class | char-sequence;
symbol = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+;
group:alias = "(" wsc* expression wsc* ")";
number:alias = [0-9]+;
count = number;
count-quantifier = "{" wsc* count wsc* "}";
range-from = number;
range-to = number;
range-quantifier = "{" wsc* range-from? wsc* "," wsc* range-to? wsc* "}";
one-or-more = "+";
zero-or-more = "*";
zero-or-one = "?";
quantity:alias = count-quantifier
| range-quantifier
| one-or-more
| zero-or-more
| zero-or-one;
quantifier = (terminal | symbol | group) wsc* quantity;
item:alias = terminal | symbol | group | quantifier;
sequence = item (wsc* item)+;
element:alias = terminal | symbol | group | quantifier | sequence;
// DOC: once cached, doesn't try again, even in a new context, therefore the order may matter
choice = element (wsc* "|" wsc* element)+;
// DOC: not having 'not' needs some tricks sometimes
expression:alias = terminal
| symbol
| group
| quantifier
| sequence
| choice;
alias = "alias";
doc = "doc";
root = "root";
flag:alias = alias | doc | root;
definition = symbol (":" flag)* wsc* "=" wsc* expression;
definitions:alias = definition (wsc* ";" (wsc | ";")* definition)*;
syntax:root = (wsc | ";")* definitions? (wsc | ";")*;

72
trace.go Normal file
View File

@ -0,0 +1,72 @@
package parse
import (
"fmt"
"os"
)
type Trace interface {
Out(...interface{})
Out1(...interface{})
Out2(...interface{})
Out3(...interface{})
Extend(string) Trace
}
type DefaultTrace struct {
level int
path string
}
type NopTrace struct{}
func NewTrace(level int) *DefaultTrace {
return &DefaultTrace{
level: level,
path: "/",
}
}
func (t *DefaultTrace) printlnLevel(l int, a ...interface{}) {
if l > t.level {
return
}
fmt.Fprintln(os.Stderr, append([]interface{}{t.path}, a...)...)
}
func (t *DefaultTrace) Out(a ...interface{}) {
t.printlnLevel(0, a...)
}
func (t *DefaultTrace) Out1(a ...interface{}) {
t.printlnLevel(1, a...)
}
func (t *DefaultTrace) Out2(a ...interface{}) {
t.printlnLevel(2, a...)
}
func (t *DefaultTrace) Out3(a ...interface{}) {
t.printlnLevel(3, a...)
}
func (t *DefaultTrace) Extend(name string) Trace {
var p string
if t.path == "/" {
p = t.path + name
} else {
p = t.path + "/" + name
}
return &DefaultTrace{
level: t.level,
path: p,
}
}
func (NopTrace) Out(...interface{}) {}
func (NopTrace) Out1(...interface{}) {}
func (NopTrace) Out2(...interface{}) {}
func (NopTrace) Out3(...interface{}) {}
func (t NopTrace) Extend(string) Trace { return t }