From b86872c58ebd4471b1a3a718cac7db0c89978891 Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sun, 25 Jun 2017 17:51:08 +0200 Subject: [PATCH] import code to its own repo --- Makefile | 17 + boot.go | 211 ++++ boot_test.go | 73 ++ bootsyntax.go | 285 +++++ cache.go | 94 ++ char.go | 108 ++ choice.go | 180 ++++ context.go | 152 +++ define.go | 274 +++++ eskip.p | 57 + eskip_test.go | 749 +++++++++++++ json.p | 14 + json_test.go | 557 ++++++++++ keyval.p | 29 + keyval_test.go | 394 +++++++ mml.p | 527 +++++++++ mml_test.go | 2791 ++++++++++++++++++++++++++++++++++++++++++++++++ next_test.go | 740 +++++++++++++ node.go | 89 ++ parse.go | 69 ++ quantifier.go | 172 +++ registry.go | 36 + scheme.p | 14 + scheme_test.go | 84 ++ sequence.go | 187 ++++ sexpr.p | 9 + sexpr_test.go | 71 ++ syntax.go | 158 +++ syntax.p | 78 ++ trace.go | 72 ++ 30 files changed, 8291 insertions(+) create mode 100644 Makefile create mode 100644 boot.go create mode 100644 boot_test.go create mode 100644 bootsyntax.go create mode 100644 cache.go create mode 100644 char.go create mode 100644 choice.go create mode 100644 context.go create mode 100644 define.go create mode 100644 eskip.p create mode 100644 eskip_test.go create mode 100644 json.p create mode 100644 json_test.go create mode 100644 keyval.p create mode 100644 keyval_test.go create mode 100644 mml.p create mode 100644 mml_test.go create mode 100644 next_test.go create mode 100644 node.go create mode 100644 parse.go create mode 100644 quantifier.go create mode 100644 registry.go create mode 100644 scheme.p create mode 100644 scheme_test.go create mode 100644 sequence.go create mode 100644 sexpr.p create mode 100644 sexpr_test.go create mode 100644 syntax.go create mode 100644 syntax.p create mode 100644 trace.go diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0e1b440 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +SOURCES = $(shell find . -name '*.go') + +default: build + +imports: + @goimports -w $(SOURCES) + +build: $(SOURCES) + go build ./... + +check: build + go test ./... -test.short -run ^Test + +fmt: $(SOURCES) + @gofmt -w -s $(SOURCES) + +precommit: build check fmt diff --git a/boot.go b/boot.go new file mode 100644 index 0000000..e68bc6b --- /dev/null +++ b/boot.go @@ -0,0 +1,211 @@ +package parse + +import ( + "errors" + "os" + "strconv" +) + +var errInvalidDefinition = errors.New("invalid syntax definition") + +func stringToCommitType(s string) CommitType { + switch s { + case "alias": + return Alias + case "doc": + return Documentation + case "root": + return Root + default: + return None + } +} + +func checkBootDefinitionLength(d []string) error { + if len(d) < 3 { + return errInvalidDefinition + } + + switch d[0] { + case "chars", "class": + if len(d) < 4 { + return errInvalidDefinition + } + + case "quantifier": + if len(d) != 6 { + return errInvalidDefinition + } + + case "sequence", "choice": + if len(d) < 4 { + return errInvalidDefinition + } + } + + return nil +} + +func parseClass(c []rune) (not bool, chars []rune, ranges [][]rune, err error) { + if c[0] == '^' { + not = true + c = c[1:] + } + + for { + if len(c) == 0 { + return + } + + var c0 rune + c0, c = c[0], c[1:] + switch c0 { + case '[', ']', '^', '-': + err = errInvalidDefinition + return + } + + if c0 == '\\' { + if len(c) == 0 { + err = errInvalidDefinition + return + } + + c0, c = unescapeChar(c[0]), c[1:] + } + + if len(c) < 2 || c[0] != '-' { + chars = append(chars, c0) + continue + } + + var c1 rune + c1, c = c[1], c[2:] + if c1 == '\\' { + if len(c) == 0 { + err = errInvalidDefinition + return + } + + c1, c = unescapeChar(c[0]), c[1:] + } + + ranges = append(ranges, []rune{c0, c1}) + } +} + +func defineBootAnything(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + return s.AnyChar(d[1], ct) +} + +func defineBootClass(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + + not, chars, ranges, err := parseClass([]rune(d[3])) + if err != nil { + return err + } + + return s.Class(d[1], ct, not, chars, ranges) +} + +func defineBootCharSequence(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + + chars, err := unescape('\\', []rune{'"', '\\'}, []rune(d[3])) + if err != nil { + return err + } + + return s.CharSequence(d[1], ct, chars) +} + +func defineBootQuantifier(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + + var ( + min, max int + err error + ) + + if min, err = strconv.Atoi(d[4]); err != nil { + return err + } + + if max, err = strconv.Atoi(d[5]); err != nil { + return err + } + + return s.Quantifier(d[1], ct, d[3], min, max) +} + +func defineBootSequence(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + return s.Sequence(d[1], ct, d[3:]...) +} + +func defineBootChoice(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + return s.Choice(d[1], ct, d[3:]...) +} + +func defineBoot(s *Syntax, d []string) error { + switch d[0] { + case "anything": + return defineBootAnything(s, d) + case "class": + return defineBootClass(s, d) + case "chars": + return defineBootCharSequence(s, d) + case "quantifier": + return defineBootQuantifier(s, d) + case "sequence": + return defineBootSequence(s, d) + case "choice": + return defineBootChoice(s, d) + default: + return errInvalidDefinition + } +} + +func defineAllBoot(s *Syntax, defs [][]string) error { + for _, d := range defs { + if err := defineBoot(s, d); err != nil { + return err + } + } + + return nil +} + +func initBoot(t Trace, definitions [][]string) (*Syntax, error) { + s := NewSyntax(t) + if err := defineAllBoot(s, definitions); err != nil { + return nil, err + } + + return s, s.Init() +} + +func bootSyntax(t Trace) (*Syntax, error) { + b, err := initBoot(t, bootDefinitions) + if err != nil { + return nil, err + } + + f, err := os.Open("syntax.p") + if err != nil { + return nil, err + } + + defer f.Close() + + doc, err := b.Parse(f) + if err != nil { + return nil, err + } + + s := NewSyntax(t) + return s, define(s, doc) +} diff --git a/boot_test.go b/boot_test.go new file mode 100644 index 0000000..0e85f68 --- /dev/null +++ b/boot_test.go @@ -0,0 +1,73 @@ +package parse + +import ( + "os" + "testing" +) + +func TestBoot(t *testing.T) { + var trace Trace + // trace = NewTrace(2) + + b, err := initBoot(trace, bootDefinitions) + if err != nil { + t.Error(err) + return + } + + f, err := os.Open("syntax.p") + if err != nil { + t.Error(err) + return + } + + defer f.Close() + + n0, err := b.Parse(f) + if err != nil { + t.Error(err) + return + } + + s0 := NewSyntax(trace) + if err := define(s0, n0); err != nil { + t.Error(err) + } + + _, err = f.Seek(0, 0) + if err != nil { + t.Error(err) + return + } + + n1, err := s0.Parse(f) + if err != nil { + t.Error(err) + return + } + + checkNode(t, n1, n0) + if t.Failed() { + return + } + + s1 := NewSyntax(trace) + if err := define(s1, n1); err != nil { + t.Error(err) + return + } + + _, err = f.Seek(0, 0) + if err != nil { + t.Error(err) + return + } + + n2, err := s1.Parse(f) + if err != nil { + t.Error(err) + return + } + + checkNode(t, n2, n1) +} diff --git a/bootsyntax.go b/bootsyntax.go new file mode 100644 index 0000000..2bcf746 --- /dev/null +++ b/bootsyntax.go @@ -0,0 +1,285 @@ +package parse + +var bootDefinitions = [][]string{{ + "chars", "space", "alias", " ", +}, { + "chars", "tab", "alias", "\\t", +}, { + "chars", "nl", "alias", "\\n", +}, { + "chars", "backspace", "alias", "\\b", +}, { + "chars", "formfeed", "alias", "\\f", +}, { + "chars", "carryreturn", "alias", "\\r", +}, { + "chars", "verticaltab", "alias", "\\v", +}, { + "choice", + "ws", + "alias", + "space", + "tab", + "nl", + "backspace", + "formfeed", + "carryreturn", + "verticaltab", +}, { + "chars", "open-block-comment", "alias", "/*", +}, { + "chars", "close-block-comment", "alias", "*/", +}, { + "chars", "star", "alias", "*", +}, { + "class", "not-slash", "alias", "^/", +}, { + "class", "not-star", "alias", "^*", +}, { + "chars", "double-slash", "alias", "//", +}, { + "class", "not-nl", "alias", "^\\n", +}, { + "sequence", "not-block-close", "alias", "star", "not-slash", +}, { + "choice", "block-comment-char", "alias", "not-block-close", "not-star", +}, { + "quantifier", "block-comment-body", "alias", "block-comment-char", "0", "-1", +}, { + "sequence", + "block-comment", + "alias", + "open-block-comment", + "block-comment-body", + "close-block-comment", +}, { + "quantifier", "not-nls", "alias", "not-nl", "0", "-1", +}, { + "sequence", "line-comment", "alias", "double-slash", "not-nls", +}, { + "choice", "comment-segment", "alias", "block-comment", "line-comment", +}, { + "quantifier", "wss", "alias", "ws", "0", "-1", +}, { + "quantifier", "optional-nl", "alias", "nl", "0", "1", +}, { + "choice", + "ws-no-nl", + "alias", + "space", + "tab", + "backspace", + "formfeed", + "carryreturn", + "verticaltab", +}, { + "sequence", + "continue-comment-segment", + "alias", + "ws-no-nl", + "optional-nl", + "ws-no-nl", + "comment-segment", +}, { + "quantifier", "continue-comment", "alias", "continue-comment-segment", "0", "-1", +}, { + "sequence", + "comment", + "none", + "comment-segment", + "continue-comment", +}, { + "choice", "wsc", "alias", "ws", "comment", +}, { + "quantifier", "wscs", "alias", "wsc", "0", "-1", +}, { + "anything", "anything", "alias", +}, { + "chars", "any-char", "none", ".", +}, { + "chars", "open-square", "alias", "[", +}, { + "chars", "close-square", "alias", "]", +}, { + "chars", "class-not", "none", "^", +}, { + "chars", "dash", "alias", "-", +}, { + "quantifier", "optional-class-not", "alias", "class-not", "0", "1", +}, { + "class", "not-class-control", "alias", "^\\\\\\[\\]\\^\\-", +}, { + "chars", "escape", "alias", "\\\\", +}, { + "sequence", "escaped-char", "alias", "escape", "anything", +}, { + "choice", "class-char", "none", "not-class-control", "escaped-char", +}, { + "sequence", "char-range", "none", "class-char", "dash", "class-char", +}, { + "choice", "char-or-range", "alias", "class-char", "char-range", +}, { + "quantifier", "chars-or-ranges", "alias", "char-or-range", "0", "-1", +}, { + "sequence", "char-class", "none", "open-square", "optional-class-not", "chars-or-ranges", "close-square", +}, { + "chars", "double-quote", "alias", "\\\"", +}, { + "class", "not-char-sequence-control", "alias", "^\\\\\"", +}, { + "choice", "sequence-char", "none", "not-char-sequence-control", "escaped-char", +}, { + "quantifier", "char-sequence-chars", "alias", "sequence-char", "0", "-1", +}, { + "sequence", "char-sequence", "none", "double-quote", "char-sequence-chars", "double-quote", +}, { + "choice", "terminal", "alias", "any-char", "char-class", "char-sequence", +}, { + "class", "symbol-char", "alias", "^\\\\ \\n\\t\\b\\f\\r\\v\\b/.\\[\\]\\\"{}\\^+*?|():=;", +}, { + "quantifier", "symbol-chars", "alias", "symbol-char", "1", "-1", +}, { + "sequence", "symbol", "none", "symbol-chars", +}, { + "chars", "open-paren", "alias", "(", +}, { + "chars", "close-paren", "alias", ")", +}, { + "sequence", "group", "alias", "open-paren", "wscs", "expression", "wscs", "close-paren", +}, { + "chars", "open-brace", "alias", "{", +}, { + "chars", "close-brace", "alias", "}", +}, { + "class", "digit", "alias", "0-9", +}, { + "quantifier", "number", "alias", "digit", "1", "-1", +}, { + "sequence", "count", "none", "number", +}, { + "sequence", "count-quantifier", "none", "open-brace", "wscs", "count", "wscs", "close-brace", +}, { + "sequence", "range-from", "none", "number", +}, { + "sequence", "range-to", "none", "number", +}, { + "chars", "comma", "alias", ",", +}, { + "sequence", + "range-quantifier", + "none", + "open-brace", + "wscs", + "range-from", + "wscs", + "comma", + "wscs", + "range-to", + "close-brace", +}, { + "chars", "one-or-more", "none", "+", +}, { + "chars", "zero-or-more", "none", "*", +}, { + "chars", "zero-or-one", "none", "?", +}, { + "choice", + "quantity", + "alias", + "count-quantifier", + "range-quantifier", + "one-or-more", + "zero-or-more", + "zero-or-one", +}, { + "choice", "quantifiable", "alias", "terminal", "symbol", "group", +}, { + "sequence", "quantifier", "none", "quantifiable", "wscs", "quantity", +}, { + "choice", "item", "alias", "terminal", "symbol", "group", "quantifier", +}, { + "sequence", "item-continue", "alias", "wscs", "item", +}, { + "quantifier", "items-continue", "alias", "item-continue", "0", "-1", +}, { + "sequence", "sequence", "none", "item", "items-continue", +}, { + "choice", "element", "alias", "terminal", "symbol", "group", "quantifier", "sequence", +}, { + "chars", "pipe", "alias", "|", +}, { + "sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element", +}, { + "quantifier", "elements-continue", "alias", "element-continue", "1", "-1", +}, { + "sequence", "choice", "none", "element", "elements-continue", +}, { + "choice", + "expression", + "alias", + "terminal", + "symbol", + "group", + "quantifier", + "sequence", + "choice", +}, { + "chars", "alias", "none", "alias", +}, { + "chars", "doc", "none", "doc", +}, { + "chars", "root", "none", "root", +}, { + "choice", "flag", "alias", "alias", "doc", "root", +}, { + "chars", "colon", "alias", ":", +}, { + "sequence", "flag-tag", "alias", "colon", "flag", +}, { + "quantifier", "flags", "alias", "flag-tag", "0", "-1", +}, { + "chars", "equal", "alias", "=", +}, { + "sequence", "definition", "none", "symbol", "flags", "wscs", "equal", "wscs", "expression", +}, { + "chars", "semicolon", "alias", ";", +}, { + "choice", "wsc-or-semicolon", "alias", "wsc", "semicolon", +}, { + "quantifier", "wsc-or-semicolons", "alias", "wsc-or-semicolon", "0", "-1", +}, { + "sequence", + "subsequent-definition", + "alias", + "wscs", + "semicolon", + "wsc-or-semicolons", + "definition", +}, { + "quantifier", + "subsequent-definitions", + "alias", + "subsequent-definition", + "0", + "-1", +}, { + "sequence", + "definitions", + "alias", + "definition", + "subsequent-definitions", +}, { + "quantifier", + "opt-definitions", + "alias", + "definitions", + "0", + "1", +}, { + "sequence", + "syntax", + "root", + "wsc-or-semicolons", + "opt-definitions", + "wsc-or-semicolons", +}} diff --git a/cache.go b/cache.go new file mode 100644 index 0000000..6ab6028 --- /dev/null +++ b/cache.go @@ -0,0 +1,94 @@ +package parse + +type cacheItem struct { + name string + node *Node +} + +type tokenCache struct { + match []*cacheItem // TODO: potential optimization can be to use a balanced binary tree + noMatch []string +} + +type cache struct { + tokens []*tokenCache // TODO: try with pointers, too +} + +func (c *cache) get(offset int, name string) (*Node, bool, bool) { + if len(c.tokens) <= offset { + return nil, false, false + } + + tc := c.tokens[offset] + if tc == nil { + return nil, false, false + } + + for _, i := range tc.noMatch { + if i == name { + return nil, false, true + } + } + + for _, i := range tc.match { + if i.name == name { + return i.node, true, true + } + } + + return nil, false, false +} + +func (c *cache) setOne(offset int, name string, n *Node) { +} + +func (c *cache) set(offset int, name string, n *Node) { + if len(c.tokens) <= offset { + if cap(c.tokens) > offset { + c.tokens = c.tokens[:offset+1] + } else { + c.tokens = c.tokens[:cap(c.tokens)] + for len(c.tokens) <= offset { + c.tokens = append(c.tokens, nil) + } + } + } + + tc := c.tokens[offset] + if tc == nil { + tc = &tokenCache{} + c.tokens[offset] = tc + } + + if n == nil { + for _, i := range tc.match { + if i.name == name { + return + } + } + + for _, i := range tc.noMatch { + if i == name { + return + } + } + + tc.noMatch = append(tc.noMatch, name) + return + } + + for _, i := range tc.match { + if i.name == name { + if n.tokenLength() > i.node.tokenLength() { + i.node = n + } + + return + } + } + + tc.match = append(tc.match, &cacheItem{ + name: name, + node: n, + }) +} diff --git a/char.go b/char.go new file mode 100644 index 0000000..5f61636 --- /dev/null +++ b/char.go @@ -0,0 +1,108 @@ +package parse + +type charParser struct { + name string + commit CommitType + any bool + not bool + chars []rune + ranges [][]rune + includedBy []parser +} + +func newChar( + name string, + ct CommitType, + any, not bool, + chars []rune, + ranges [][]rune, +) *charParser { + return &charParser{ + name: name, + commit: ct, + any: any, + not: not, + chars: chars, + ranges: ranges, + } +} + +func (p *charParser) nodeName() string { return p.name } + +func (p *charParser) parser(r *registry, path []string) (parser, error) { + if stringsContain(path, p.name) { + panic(errCannotIncludeParsers) + } + + r.setParser(p) + return p, nil +} + +func (p *charParser) commitType() CommitType { + return p.commit +} + +func (p *charParser) setIncludedBy(i parser, path []string) { + if stringsContain(path, p.name) { + panic(errCannotIncludeParsers) + } + + p.includedBy = append(p.includedBy, i) +} + +func (p *charParser) cacheIncluded(*context, *Node) { + panic(errCannotIncludeParsers) +} + +func (p *charParser) match(t rune) bool { + if p.any { + return true + } + + for _, ci := range p.chars { + if ci == t { + return !p.not + } + } + + for _, ri := range p.ranges { + if t >= ri[0] && t <= ri[1] { + return !p.not + } + } + + return p.not +} + +func (p *charParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing char", c.offset) + + if p.commit&Documentation != 0 { + t.Out1("fail, doc") + c.fail(c.offset) + return + } + + if m, ok := c.fromCache(p.name); ok { + t.Out1("found in cache, match:", m) + return + } + + if tok, ok := c.token(); ok && p.match(tok) { + t.Out1("success", string(tok)) + n := newNode(p.name, p.commit, c.offset, c.offset+1) + c.cache.set(c.offset, p.name, n) + for _, i := range p.includedBy { + i.cacheIncluded(c, n) + } + + c.success(n) + return + } else { + t.Out1("fail", string(tok)) + c.cache.set(c.offset, p.name, nil) + c.fail(c.offset) + return + } +} diff --git a/choice.go b/choice.go new file mode 100644 index 0000000..253e182 --- /dev/null +++ b/choice.go @@ -0,0 +1,180 @@ +package parse + +type choiceDefinition struct { + name string + commit CommitType + elements []string +} + +type choiceParser struct { + name string + commit CommitType + elements []parser + including []parser +} + +func newChoice(name string, ct CommitType, elements []string) *choiceDefinition { + return &choiceDefinition{ + name: name, + commit: ct, + elements: elements, + } +} + +func (d *choiceDefinition) nodeName() string { return d.name } + +// could store and cache everything that it fulfils + +func (d *choiceDefinition) parser(r *registry, path []string) (parser, error) { + p, ok := r.parser(d.name) + if ok { + return p, nil + } + + cp := &choiceParser{ + name: d.name, + commit: d.commit, + } + + r.setParser(cp) + + var elements []parser + path = append(path, d.name) + for _, e := range d.elements { + element, ok := r.parser(e) + if ok { + elements = append(elements, element) + element.setIncludedBy(cp, path) + continue + } + + elementDefinition, ok := r.definition(e) + if !ok { + return nil, parserNotFound(e) + } + + element, err := elementDefinition.parser(r, path) + if err != nil { + return nil, err + } + + element.setIncludedBy(cp, path) + elements = append(elements, element) + } + + cp.elements = elements + return cp, nil +} + +func (d *choiceDefinition) commitType() CommitType { + return d.commit +} + +func (p *choiceParser) nodeName() string { return p.name } + +func (p *choiceParser) setIncludedBy(i parser, path []string) { + if stringsContain(path, p.name) { + return + } + + p.including = append(p.including, i) +} + +func (p *choiceParser) cacheIncluded(c *context, n *Node) { + if !c.excluded(n.from, p.name) { + return + } + + nc := newNode(p.name, p.commit, n.from, n.to) + nc.append(n) + c.cache.set(nc.from, p.name, nc) + + // maybe it is enough to cache only those that are on the path + for _, i := range p.including { + i.cacheIncluded(c, nc) + } +} + +func (p *choiceParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing choice", c.offset) + + if p.commit&Documentation != 0 { + t.Out1("fail, doc") + c.fail(c.offset) + return + } + + if m, ok := c.fromCache(p.name); ok { + t.Out1("found in cache, match:", m) + return + } + + if c.excluded(c.offset, p.name) { + t.Out1("excluded") + c.fail(c.offset) + return + } + + c.exclude(c.offset, p.name) + defer c.include(c.offset, p.name) + + node := newNode(p.name, p.commit, c.offset, c.offset) + var match bool + + for { + elements := p.elements + var foundMatch bool + + // TODO: this can be the entry point for a transformation that enables the + // processing of massive amounts of autogenerated rules in parallel in a + // continously, dynamically cached way. E.g. teach a machine that learns + // everything from a public library. + + t.Out2("elements again") + for len(elements) > 0 { + t.Out2("in the choice", c.offset, node.from, elements[0].nodeName()) + elements[0].parse(t, c) + elements = elements[1:] + c.offset = node.from + + if !c.match || match && c.node.tokenLength() <= node.tokenLength() { + t.Out2("skipping") + continue + } + + t.Out2("appending", c.node.tokenLength(), node.tokenLength(), + "\"", string(c.tokens[node.from:node.to]), "\"", + "\"", string(c.tokens[c.node.from:c.node.to]), "\"", + c.node.Name, + ) + match = true + foundMatch = true + // node.clear() + node = newNode(p.name, p.commit, c.offset, c.offset) // TODO: review caching conditions + node.append(c.node) + + c.cache.set(node.from, p.name, node) + for _, i := range p.including { + i.cacheIncluded(c, node) + } + + // TODO: a simple break here can force PEG-style "priority" choices + } + + if !foundMatch { + break + } + } + + if match { + t.Out1("choice, success") + t.Out2("choice done", node.nodeLength()) + c.success(node) + return + } + + t.Out1("fail") + c.cache.set(node.from, p.name, nil) + c.fail(node.from) +} diff --git a/context.go b/context.go new file mode 100644 index 0000000..2121e9d --- /dev/null +++ b/context.go @@ -0,0 +1,152 @@ +package parse + +import ( + "io" + "unicode" +) + +type context struct { + reader io.RuneReader + offset int + readOffset int + readErr error + eof bool + cache *cache + tokens []rune + match bool + node *Node + isExcluded [][]string +} + +func newContext(r io.RuneReader) *context { + return &context{ + reader: r, + cache: &cache{}, + } +} + +func (c *context) read() bool { + if c.eof || c.readErr != nil { + return false + } + + t, n, err := c.reader.ReadRune() + if err != nil { + if err == io.EOF { + if n == 0 { + c.eof = true + return false + } + } else { + c.readErr = err + return false + } + } + + c.readOffset++ + + if t == unicode.ReplacementChar { + c.readErr = ErrInvalidCharacter + return false + } + + c.tokens = append(c.tokens, t) + return true +} + +func (c *context) token() (rune, bool) { + if c.offset == c.readOffset { + if !c.read() { + return 0, false + } + } + + return c.tokens[c.offset], true +} + +func (c *context) excluded(offset int, name string) bool { + if len(c.isExcluded) <= offset { + return false + } + + return stringsContain(c.isExcluded[offset], name) +} + +func (c *context) exclude(offset int, name string) { + if len(c.isExcluded) <= offset { + c.isExcluded = append(c.isExcluded, nil) + if cap(c.isExcluded) > offset { + c.isExcluded = c.isExcluded[:offset+1] + } else { + c.isExcluded = append( + c.isExcluded[:cap(c.isExcluded)], + make([][]string, offset+1-cap(c.isExcluded))..., + ) + } + } + + c.isExcluded[offset] = append(c.isExcluded[offset], name) +} + +func (c *context) include(offset int, name string) { + if len(c.isExcluded) <= offset { + return + } + + for i := len(c.isExcluded[offset]) - 1; i >= 0; i-- { + if c.isExcluded[offset][i] == name { + c.isExcluded[offset] = append(c.isExcluded[offset][:i], c.isExcluded[offset][i+1:]...) + } + } +} + +func (c *context) fromCache(name string) (bool, bool) { + n, m, ok := c.cache.get(c.offset, name) + if !ok { + return false, false + } + + if m { + c.success(n) + } else { + c.fail(c.offset) + } + + return m, true +} + +func (c *context) success(n *Node) { + c.node = n + c.offset = n.to + c.match = true +} + +func (c *context) fail(offset int) { + c.offset = offset + c.match = false +} + +func (c *context) finalize() error { + if c.node.to < c.readOffset { + return ErrUnexpectedCharacter + } + + if !c.eof { + c.read() + if !c.eof { + if c.readErr != nil { + return c.readErr + } + + return ErrUnexpectedCharacter + } + } + + c.node.commit() + if c.node.commitType&Alias != 0 { + return nil + } + + c.node.applyTokens(c.tokens) + return nil +} diff --git a/define.go b/define.go new file mode 100644 index 0000000..f11f6de --- /dev/null +++ b/define.go @@ -0,0 +1,274 @@ +package parse + +import "strconv" + +func runesContain(rs []rune, r rune) bool { + for _, ri := range rs { + if ri == r { + return true + } + } + + return false +} + +func unescapeChar(c rune) rune { + switch c { + case 'n': + return '\n' + case 't': + return '\t' + case 'b': + return '\b' + case 'f': + return '\f' + case 'r': + return '\r' + case 'v': + return '\v' + default: + return c + } +} + +func unescape(escape rune, banned []rune, chars []rune) ([]rune, error) { + var ( + unescaped []rune + escaped bool + ) + + for _, ci := range chars { + if escaped { + unescaped = append(unescaped, unescapeChar(ci)) + escaped = false + continue + } + + switch { + case ci == escape: + escaped = true + case runesContain(banned, ci): + return nil, ErrInvalidCharacter + default: + unescaped = append(unescaped, ci) + } + } + + if escaped { + return nil, ErrInvalidCharacter + } + + return unescaped, nil +} + +func dropComments(n *Node) *Node { + ncc := *n + nc := &ncc + + nc.Nodes = nil + for _, ni := range n.Nodes { + if ni.Name == "comment" { + continue + } + + nc.Nodes = append(nc.Nodes, dropComments(ni)) + } + + return nc +} + +func flagsToCommitType(n []*Node) CommitType { + var ct CommitType + for _, ni := range n { + switch ni.Name { + case "alias": + ct |= Alias + case "doc": + ct |= Documentation + case "root": + ct |= Root + } + } + + return ct +} + +func toRune(c string) rune { + return []rune(c)[0] +} + +func nodeChar(n *Node) rune { + s := n.Text() + if s[0] == '\\' { + return unescapeChar(toRune(s[1:])) + } + + return toRune(s) +} + +func defineMembers(s *Syntax, name string, n ...*Node) ([]string, error) { + var refs []string + for i, ni := range n { + nmi := childName(name, i) + switch ni.Name { + case "symbol": + refs = append(refs, ni.Text()) + default: + refs = append(refs, nmi) + if err := defineExpression(s, nmi, Alias, ni); err != nil { + return nil, err + } + } + } + + return refs, nil +} + +func defineClass(s *Syntax, name string, ct CommitType, n []*Node) error { + var ( + not bool + chars []rune + ranges [][]rune + ) + + if len(n) > 0 && n[0].Name == "class-not" { + not, n = true, n[1:] + } + + for _, c := range n { + switch c.Name { + case "class-char": + chars = append(chars, nodeChar(c)) + case "char-range": + ranges = append(ranges, []rune{nodeChar(c.Nodes[0]), nodeChar(c.Nodes[1])}) + } + } + + return s.Class(name, ct, not, chars, ranges) +} + +func defineCharSequence(s *Syntax, name string, ct CommitType, charNodes []*Node) error { + var chars []rune + for _, ci := range charNodes { + chars = append(chars, nodeChar(ci)) + } + + return s.CharSequence(name, ct, chars) +} + +func defineQuantifier(s *Syntax, name string, ct CommitType, n *Node, q *Node) error { + refs, err := defineMembers(s, name, n) + if err != nil { + return err + } + + var min, max int + switch q.Name { + case "count-quantifier": + min, err = strconv.Atoi(q.Nodes[0].Text()) + if err != nil { + return err + } + + max = min + case "range-quantifier": + min = 0 + max = -1 + for _, rq := range q.Nodes { + switch rq.Name { + case "range-from": + min, err = strconv.Atoi(rq.Text()) + if err != nil { + return err + } + case "range-to": + max, err = strconv.Atoi(rq.Text()) + if err != nil { + return err + } + default: + return ErrInvalidSyntax + } + } + case "one-or-more": + min, max = 1, -1 + case "zero-or-more": + min, max = 0, -1 + case "zero-or-one": + min, max = 0, 1 + } + + return s.Quantifier(name, ct, refs[0], min, max) +} + +func defineSequence(s *Syntax, name string, ct CommitType, n ...*Node) error { + refs, err := defineMembers(s, name, n...) + if err != nil { + return err + } + + // // TODO: try to make this expressed in the syntax (maybe as sequences need either a quantififer or not + // // one item? or by maintaining the excluded and caching in the sequence in a similar way when there is + // // only one item?) how does this effect the quantifiers? + // if len(refs) == 1 { + // return s.Choice(name, ct, refs[0]) + // } + + return s.Sequence(name, ct, refs...) +} + +func defineChoice(s *Syntax, name string, ct CommitType, n ...*Node) error { + refs, err := defineMembers(s, name, n...) + if err != nil { + return err + } + + return s.Choice(name, ct, refs...) +} + +func defineExpression(s *Syntax, name string, ct CommitType, expression *Node) error { + var err error + switch expression.Name { + case "any-char": + err = s.AnyChar(name, ct) + case "char-class": + err = defineClass(s, name, ct, expression.Nodes) + case "char-sequence": + err = defineCharSequence(s, name, ct, expression.Nodes) + case "symbol": + err = defineSequence(s, name, ct, expression) + case "quantifier": + err = defineQuantifier(s, name, ct, expression.Nodes[0], expression.Nodes[1]) + case "sequence": + err = defineSequence(s, name, ct, expression.Nodes...) + case "choice": + err = defineChoice(s, name, ct, expression.Nodes...) + } + + return err +} + +func defineDefinition(s *Syntax, n *Node) error { + return defineExpression( + s, + n.Nodes[0].Text(), + flagsToCommitType(n.Nodes[1:len(n.Nodes)-1]), + n.Nodes[len(n.Nodes)-1], + ) +} + +func define(s *Syntax, n *Node) error { + if n.Name != "syntax" { + return ErrInvalidSyntax + } + + n = dropComments(n) + + for _, ni := range n.Nodes { + if err := defineDefinition(s, ni); err != nil { + return err + } + } + + return nil +} diff --git a/eskip.p b/eskip.p new file mode 100644 index 0000000..541a577 --- /dev/null +++ b/eskip.p @@ -0,0 +1,57 @@ +/* +Eskip routing configuration format for Skipper: https://github.com/zalando/skipper +*/ + +// TODO: definition with comment, doc = comment, or just replace comment + +eskip:root = (expression | definitions)?; + +comment-line:alias = "//" [^\n]*; +space:alias = [ \b\f\r\t\v]; +comment:alias = comment-line (space* "\n" space* comment-line)*; + +wsc:alias = [ \b\f\n\r\t\v] | comment; + +decimal-digit:alias = [0-9]; +octal-digit:alias = [0-7]; +hexa-digit:alias = [0-9a-fA-F]; + +decimal:alias = [1-9] decimal-digit*; +octal:alias = "0" octal-digit*; +hexa:alias = "0" [xX] hexa-digit+; +int = decimal | octal | hexa; + +exponent:alias = [eE] [+\-]? decimal-digit+; +float = decimal-digit+ "." decimal-digit* exponent? + | "." decimal-digit+ exponent? + | decimal-digit+ exponent; + +number:alias = "-"? (int | float); + +string = "\"" ([^\\"] | "\\" .)* "\""; +regexp = "/" ([^\\/] | "\\" .)* "/"; +symbol = [a-zA-Z_] [a-zA-z0-9_]*; + +arg:alias = number | string | regexp; +args:alias = arg (wsc* "," wsc* arg)*; +term:alias = symbol wsc* "(" wsc* args? wsc* ")"; + +predicate = term; +predicates:alias = "*" | predicate (wsc* "&&" wsc* predicate)*; + +filter = term; +filters:alias = filter (wsc* "->" wsc* filter)*; + +address:alias = string; +shunt = ""; +loopback = ""; +backend:alias = address | shunt | loopback; + +expression = predicates (wsc* "->" wsc* filters)? wsc* "->" wsc* backend; + +id:alias = symbol; +definition = id wsc* ":" wsc* expression; + +free-sep:alias = (wsc | ";"); +sep:alias = wsc* ";" free-sep*; +definitions:alias = free-sep* definition (sep definition)* free-sep*; diff --git a/eskip_test.go b/eskip_test.go new file mode 100644 index 0000000..0a2915a --- /dev/null +++ b/eskip_test.go @@ -0,0 +1,749 @@ +package parse + +import ( + "bytes" + "errors" + "fmt" + "math/rand" + "strconv" + "strings" + "testing" + + "github.com/zalando/skipper/eskip" +) + +const ( + maxID = 27 + meanID = 9 + + setPathChance = 0.72 + maxPathTags = 12 + meanPathTags = 2 + maxPathTag = 24 + meanPathTag = 9 + + setHostChance = 0.5 + maxHost = 48 + meanHost = 24 + + setPathRegexpChance = 0.45 + maxPathRegexp = 36 + meanPathRegexp = 12 + + setMethodChance = 0.1 + + setHeadersChance = 0.3 + maxHeadersLength = 6 + meanHeadersLength = 1 + maxHeaderKeyLength = 18 + meanHeaderKeyLength = 12 + maxHeaderValueLength = 48 + meanHeaderValueLength = 6 + + setHeaderRegexpChance = 0.05 + maxHeaderRegexpsLength = 3 + meanHeaderRegexpsLength = 1 + maxHeaderRegexpLength = 12 + meanHeaderRegexpLength = 6 + + maxTermNameLength = 15 + meanTermNameLength = 6 + maxTermArgsLength = 6 + meanTermArgsLength = 1 + floatArgChance = 0.1 + intArgChance = 0.3 + maxTermStringLength = 24 + meanTermStringLength = 6 + + maxPredicatesLength = 4 + meanPredicatesLength = 1 + + maxFiltersLength = 18 + meanFiltersLength = 3 + + loopBackendChance = 0.05 + shuntBackendChance = 0.1 + maxBackend = 48 + meanBackend = 15 +) + +func takeChance(c float64) bool { + return rand.Float64() < c +} + +func generateID() string { + return generateString(maxID, meanID) +} + +func generatePath() string { + if !takeChance(setPathChance) { + return "" + } + + l := randomLength(maxPathTags, meanPathTags) + p := append(make([]string, 0, l+1), "") + for i := 0; i < l; i++ { + p = append(p, generateString(maxPathTag, meanPathTag)) + } + + return strings.Join(p, "/") +} + +func generateHostRegexps() []string { + if !takeChance(setHostChance) { + return nil + } + + return []string{generateString(maxHost, meanHost)} +} + +func generatePathRegexps() []string { + if !takeChance(setPathRegexpChance) { + return nil + } + + return []string{generateString(maxPathRegexp, meanPathRegexp)} +} + +func generateMethod() string { + if !takeChance(setMethodChance) { + return "" + } + + methods := []string{"GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"} + return methods[rand.Intn(len(methods))] +} + +func generateHeaders() map[string]string { + if !takeChance(setHeadersChance) { + return nil + } + + h := make(map[string]string) + for i := 0; i < randomLength(maxHeadersLength, meanHeadersLength); i++ { + h[generateString(maxHeaderKeyLength, meanHeaderKeyLength)] = + generateString(maxHeaderValueLength, meanHeaderValueLength) + } + + return h +} + +func generateHeaderRegexps() map[string][]string { + if !takeChance(setHeaderRegexpChance) { + return nil + } + + h := make(map[string][]string) + for i := 0; i < randomLength(maxHeaderRegexpsLength, meanHeaderRegexpsLength); i++ { + k := generateString(maxHeaderKeyLength, meanHeaderKeyLength) + for i := 0; i < randomLength(maxHeaderRegexpLength, meanHeaderRegexpLength); i++ { + h[k] = append(h[k], generateString(maxHeaderValueLength, meanHeaderValueLength)) + } + } + + return h +} + +func generateTerm() (string, []interface{}) { + n := generateString(maxTermNameLength, meanTermNameLength) + al := randomLength(maxTermArgsLength, meanTermArgsLength) + a := make([]interface{}, 0, al) + for i := 0; i < al; i++ { + at := rand.Float64() + switch { + case at < floatArgChance: + a = append(a, rand.NormFloat64()) + case at < intArgChance: + a = append(a, rand.Int()) + default: + a = append(a, generateString(maxTermStringLength, meanTermStringLength)) + } + } + + return n, a +} + +func generatePredicates() []*eskip.Predicate { + l := randomLength(maxPredicatesLength, meanPredicatesLength) + p := make([]*eskip.Predicate, 0, l) + for i := 0; i < l; i++ { + pi := &eskip.Predicate{} + pi.Name, pi.Args = generateTerm() + p = append(p, pi) + } + + return p +} + +func generateFilters() []*eskip.Filter { + l := randomLength(maxFiltersLength, meanFiltersLength) + f := make([]*eskip.Filter, 0, l) + for i := 0; i < l; i++ { + fi := &eskip.Filter{} + fi.Name, fi.Args = generateTerm() + f = append(f, fi) + } + + return f +} + +func generateBackend() (eskip.BackendType, string) { + t := rand.Float64() + switch { + case t < loopBackendChance: + return eskip.LoopBackend, "" + case t < loopBackendChance+shuntBackendChance: + return eskip.ShuntBackend, "" + default: + return eskip.NetworkBackend, generateString(maxBackend, meanBackend) + } +} + +func generateRoute() *eskip.Route { + r := &eskip.Route{} + r.Id = generateID() + r.Path = generatePath() + r.HostRegexps = generateHostRegexps() + r.PathRegexps = generatePathRegexps() + r.Method = generateMethod() + r.Headers = generateHeaders() + r.HeaderRegexps = generateHeaderRegexps() + r.Predicates = generatePredicates() + r.Filters = generateFilters() + r.BackendType, r.Backend = generateBackend() + return r +} + +func generateEskip(l int) []*eskip.Route { + r := make([]*eskip.Route, 0, l) + for i := 0; i < l; i++ { + r = append(r, generateRoute()) + } + + return r +} + +func parseEskipInt(s string) (int, error) { + i, err := strconv.ParseInt(s, 0, 64) + return int(i), err +} + +func parseEskipFloat(s string) (float64, error) { + f, err := strconv.ParseFloat(s, 64) + return f, err +} + +func unquote(s string, escapedChars string) (string, error) { + if len(s) < 2 { + return "", nil + } + + b := make([]byte, 0, len(s)-2) + var escaped bool + for _, bi := range []byte(s[1 : len(s)-1]) { + if escaped { + switch bi { + case 'b': + bi = '\b' + case 'f': + bi = '\f' + case 'n': + bi = '\n' + case 'r': + bi = '\r' + case 't': + bi = '\t' + case 'v': + bi = '\v' + } + + b = append(b, bi) + escaped = false + continue + } + + for _, ec := range []byte(escapedChars) { + if ec == bi { + return "", errors.New("invalid quote") + } + } + + if bi == '\\' { + escaped = true + continue + } + + b = append(b, bi) + } + + return string(b), nil +} + +func unquoteString(s string) (string, error) { + return unquote(s, "\"") +} + +func unquoteRegexp(s string) (string, error) { + return unquote(s, "/") +} + +func nodeToArg(n *Node) (interface{}, error) { + switch n.Name { + case "int": + return parseEskipInt(n.Text()) + case "float": + return parseEskipFloat(n.Text()) + case "string": + return unquoteString(n.Text()) + case "regexp": + return unquoteRegexp(n.Text()) + default: + return nil, errors.New("invalid arg") + } +} + +func nodeToTerm(n *Node) (string, []interface{}, error) { + if len(n.Nodes) < 1 || n.Nodes[0].Name != "symbol" { + return "", nil, errors.New("invalid term") + } + + name := n.Nodes[0].Text() + + var args []interface{} + for _, ni := range n.Nodes[1:] { + a, err := nodeToArg(ni) + if err != nil { + return "", nil, err + } + + args = append(args, a) + } + + return name, args, nil +} + +func nodeToPredicate(r *eskip.Route, n *Node) error { + name, args, err := nodeToTerm(n) + if err != nil { + return err + } + + switch name { + case "Path": + if len(args) != 1 { + return errors.New("invalid path predicate") + } + + p, ok := args[0].(string) + if !ok { + return errors.New("invalid path predicate") + } + + r.Path = p + case "Host": + if len(args) != 1 { + return errors.New("invalid host predicate") + } + + h, ok := args[0].(string) + if !ok { + return errors.New("invalid host predicate") + } + + r.HostRegexps = append(r.HostRegexps, h) + case "PathRegexp": + if len(args) != 1 { + return errors.New("invalid path regexp predicate") + } + + p, ok := args[0].(string) + if !ok { + return errors.New("invalid path regexp predicate") + } + + r.PathRegexps = append(r.PathRegexps, p) + case "Method": + if len(args) != 1 { + return errors.New("invalid method predicate") + } + + m, ok := args[0].(string) + if !ok { + return errors.New("invalid method predicate") + } + + r.Method = m + case "Header": + if len(args) != 2 { + return errors.New("invalid header predicate") + } + + name, ok := args[0].(string) + if !ok { + return errors.New("invalid header predicate") + } + + value, ok := args[1].(string) + if !ok { + return errors.New("invalid header predicate") + } + + if r.Headers == nil { + r.Headers = make(map[string]string) + } + + r.Headers[name] = value + case "HeaderRegexp": + if len(args) != 2 { + return errors.New("invalid header regexp predicate") + } + + name, ok := args[0].(string) + if !ok { + return errors.New("invalid header regexp predicate") + } + + value, ok := args[1].(string) + if !ok { + return errors.New("invalid header regexp predicate") + } + + if r.HeaderRegexps == nil { + r.HeaderRegexps = make(map[string][]string) + } + + r.HeaderRegexps[name] = append(r.HeaderRegexps[name], value) + default: + r.Predicates = append(r.Predicates, &eskip.Predicate{Name: name, Args: args}) + } + + return nil +} + +func nodeToFilter(n *Node) (*eskip.Filter, error) { + name, args, err := nodeToTerm(n) + if err != nil { + return nil, err + } + + return &eskip.Filter{Name: name, Args: args}, nil +} + +func nodeToBackend(r *eskip.Route, n *Node) error { + switch n.Name { + case "string": + b, err := unquoteString(n.Text()) + if err != nil { + return err + } + + r.BackendType = eskip.NetworkBackend + r.Backend = b + case "shunt": + r.BackendType = eskip.ShuntBackend + case "loopback": + r.BackendType = eskip.LoopBackend + default: + return errors.New("invalid backend type") + } + + return nil +} + +func nodeToEskipDefinition(n *Node) (*eskip.Route, error) { + ns := n.Nodes + if len(ns) < 2 || len(ns[1].Nodes) == 0 { + return nil, fmt.Errorf("invalid definition length: %d", len(ns)) + } + + r := &eskip.Route{} + + if ns[0].Name != "symbol" { + return nil, errors.New("invalid definition id") + } + + r.Id, ns = ns[0].Text(), ns[1].Nodes + +predicates: + for i, ni := range ns { + switch ni.Name { + case "predicate": + if err := nodeToPredicate(r, ni); err != nil { + return nil, err + } + case "filter", "string", "shunt", "loopback": + ns = ns[i:] + break predicates + default: + return nil, errors.New("invalid definition item among predicates") + } + } + +filters: + for i, ni := range ns { + switch ni.Name { + case "filter": + f, err := nodeToFilter(ni) + if err != nil { + return nil, err + } + + r.Filters = append(r.Filters, f) + case "string", "shunt", "loopback": + ns = ns[i:] + break filters + default: + return nil, errors.New("invalid definition item among filters") + } + } + + if len(ns) != 1 { + return nil, fmt.Errorf("invalid definition backend, remaining definition length: %d, %s", + len(ns), n.Text()) + } + + if err := nodeToBackend(r, ns[0]); err != nil { + return nil, err + } + + return r, nil +} + +func treeToEskip(n []*Node) ([]*eskip.Route, error) { + r := make([]*eskip.Route, 0, len(n)) + for _, ni := range n { + d, err := nodeToEskipDefinition(ni) + if err != nil { + return nil, err + } + + r = append(r, d) + } + + return r, nil +} + +func checkTerm(t *testing.T, gotName, expectedName string, gotArgs, expectedArgs []interface{}) { + if gotName != expectedName { + t.Error("invalid term name") + return + } + + // legacy bug support + for i := len(expectedArgs) - 1; i >= 0; i-- { + if _, ok := expectedArgs[i].(int); ok { + expectedArgs = append(expectedArgs[:i], expectedArgs[i+1:]...) + continue + } + + if v, ok := expectedArgs[i].(float64); ok && v < 0 { + gotArgs = append(gotArgs[:i], gotArgs[i+1:]...) + expectedArgs = append(expectedArgs[:i], expectedArgs[i+1:]...) + } + } + + if len(gotArgs) != len(expectedArgs) { + t.Error("invalid term args length", len(gotArgs), len(expectedArgs)) + return + } + + for i, a := range gotArgs { + if a != expectedArgs[i] { + t.Error("invalid term arg") + return + } + } +} + +func checkPredicates(t *testing.T, got, expected *eskip.Route) { + if got.Path != expected.Path { + t.Error("invalid path") + return + } + + if len(got.HostRegexps) != len(expected.HostRegexps) { + t.Error("invalid host length") + return + } + + for i, h := range got.HostRegexps { + if h != expected.HostRegexps[i] { + t.Error("invalid host") + return + } + } + + if len(got.PathRegexps) != len(expected.PathRegexps) { + t.Error("invalid path regexp length", len(got.PathRegexps), len(expected.PathRegexps)) + return + } + + for i, h := range got.PathRegexps { + if h != expected.PathRegexps[i] { + t.Error("invalid path regexp") + return + } + } + + if got.Method != expected.Method { + t.Error("invalid method") + return + } + + if len(got.Headers) != len(expected.Headers) { + t.Error("invalid headers length") + return + } + + for n, h := range got.Headers { + he, ok := expected.Headers[n] + if !ok { + t.Error("invalid header name") + return + } + + if he != h { + t.Error("invalid header") + return + } + } + + if len(got.HeaderRegexps) != len(expected.HeaderRegexps) { + t.Error("invalid header regexp length") + return + } + + for n, h := range got.HeaderRegexps { + he, ok := expected.HeaderRegexps[n] + if !ok { + t.Error("invalid header regexp name") + return + } + + if len(h) != len(he) { + t.Error("invalid header regexp item length") + return + } + + for i, hi := range h { + if hi != he[i] { + t.Error("invalid header regexp") + return + } + } + } + + if len(got.Predicates) != len(expected.Predicates) { + t.Error("invalid predicates length") + return + } + + for i, p := range got.Predicates { + checkTerm( + t, + p.Name, expected.Predicates[i].Name, + p.Args, expected.Predicates[i].Args, + ) + + if t.Failed() { + t.Log(p.Name, expected.Predicates[i].Name) + t.Log(p.Args, expected.Predicates[i].Args) + return + } + } +} + +func checkFilters(t *testing.T, got, expected []*eskip.Filter) { + if len(got) != len(expected) { + t.Error("invalid filters length") + return + } + + for i, f := range got { + checkTerm( + t, + f.Name, expected[i].Name, + f.Args, expected[i].Args, + ) + + if t.Failed() { + return + } + } +} + +func checkBackend(t *testing.T, got, expected *eskip.Route) { + if got.BackendType != expected.BackendType { + t.Error("invalid backend type") + return + } + + if got.Backend != expected.Backend { + t.Error("invalid backend") + return + } +} + +func checkRoute(t *testing.T, got, expected *eskip.Route) { + if got.Id != expected.Id { + t.Error("invalid route id") + return + } + + checkPredicates(t, got, expected) + if t.Failed() { + return + } + + checkFilters(t, got.Filters, expected.Filters) + if t.Failed() { + return + } + + checkBackend(t, got, expected) +} + +func checkEskip(t *testing.T, got, expected []*eskip.Route) { + if len(got) != len(expected) { + t.Error("invalid length", len(got), len(expected)) + return + } + + for i, ri := range got { + checkRoute(t, ri, expected[i]) + if t.Failed() { + t.Log(ri.String()) + t.Log(expected[i].String()) + return + } + } +} + +func eskipTreeToEskip(n *Node) ([]*eskip.Route, error) { + return treeToEskip(n.Nodes) +} + +func TestEskip(t *testing.T) { + r := generateEskip(1 << 9) + e := eskip.Print(true, r...) + b := bytes.NewBufferString(e) + s, err := testSyntax("eskip.p", 0) + if err != nil { + t.Error(err) + return + } + + n, err := s.Parse(b) + if err != nil { + t.Error(err) + return + } + + rback, err := eskipTreeToEskip(n) + if err != nil { + t.Error(err) + return + } + + checkEskip(t, rback, r) +} diff --git a/json.p b/json.p new file mode 100644 index 0000000..cef6674 --- /dev/null +++ b/json.p @@ -0,0 +1,14 @@ +// JSON (http://www.json.org) +ws:alias = [ \b\f\n\r\t]; +true = "true"; +false = "false"; +null = "null"; +string = "\"" ([^\\"\b\f\n\r\t] | "\\" (["\\/bfnrt] | "u" [0-9a-f]{4}))* "\""; +number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; +entry = string ws* ":" ws* value; +object = "{" ws* (entry (ws* "," ws* entry)*)? ws* "}"; +array = "[" ws* (value (ws* "," ws* value)*)? ws* "]"; +value:alias = true | false | null | string | number | object | array; +json = value; + +// TODO: value should be an alias but test it first like this diff --git a/json_test.go b/json_test.go new file mode 100644 index 0000000..7bc1a29 --- /dev/null +++ b/json_test.go @@ -0,0 +1,557 @@ +package parse + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "math" + "math/rand" + "testing" +) + +type jsonValueType int + +const ( + jsonNone jsonValueType = iota + jsonTrue + jsonFalse + jsonNull + jsonString + jsonNumber + jsonObject + jsonArray +) + +const ( + maxStringLength = 64 + meanStringLength = 18 + maxKeyLength = 24 + meanKeyLength = 6 + maxObjectLength = 12 + meanObjectLength = 6 + maxArrayLength = 64 + meanArrayLength = 8 +) + +func randomLength(max, mean int) int { + return int(rand.NormFloat64()*float64(max)/math.MaxFloat64 + float64(mean)) +} + +func generateString(max, mean int) string { + l := randomLength(max, mean) + b := make([]byte, l) + for i := range b { + b[i] = byte(rand.Intn(int('z')-int('a')+1)) + 'a' + } + + return string(b) +} + +func generateJSONString() string { + return generateString(maxStringLength, meanStringLength) +} + +func generateJSONNumber() interface{} { + if rand.Intn(2) == 1 { + return rand.NormFloat64() + } + + n := rand.Int() + if rand.Intn(2) == 0 { + return n + } + + return -n +} + +func generateKey() string { + return generateString(maxKeyLength, meanKeyLength) +} + +func generateJSONObject(minDepth int) map[string]interface{} { + l := randomLength(maxObjectLength, meanObjectLength) + o := make(map[string]interface{}) + for i := 0; i < l; i++ { + o[generateKey()] = generateJSON(0) + } + + if minDepth > 0 { + o[generateKey()] = generateJSON(minDepth) + } + + return o +} + +func generateJSONArray(minDepth int) []interface{} { + l := randomLength(maxArrayLength, meanArrayLength) + a := make([]interface{}, l, l+1) + for i := 0; i < l; i++ { + a[i] = generateJSON(0) + } + + if minDepth > 0 { + a = append(a, generateJSON(minDepth)) + } + + return a +} + +func generateJSONObjectOrArray(minDepth int) interface{} { + if rand.Intn(2) == 0 { + return generateJSONObject(minDepth - 1) + } + + return generateJSONArray(minDepth - 1) +} + +func generateJSON(minDepth int) interface{} { + if minDepth > 0 { + return generateJSONObjectOrArray(minDepth) + } + + switch jsonValueType(rand.Intn(int(jsonNumber)) + 1) { + case jsonTrue: + return true + case jsonFalse: + return false + case jsonNull: + return nil + case jsonString: + return generateJSONString() + case jsonNumber: + return generateJSONNumber() + default: + panic("invalid json type") + } +} + +func unqouteJSONString(t string) (string, error) { + var s string + err := json.Unmarshal([]byte(t), &s) + return s, err +} + +func parseJSONNumber(t string) (interface{}, error) { + n := json.Number(t) + if i, err := n.Int64(); err == nil { + return int(i), nil + } + + return n.Float64() +} + +func nodeToJSONObject(n *Node) (map[string]interface{}, error) { + o := make(map[string]interface{}) + for _, ni := range n.Nodes { + if len(ni.Nodes) != 2 { + return nil, errors.New("invalid json object") + } + + key, err := unqouteJSONString(ni.Nodes[0].Text()) + if err != nil { + return nil, err + } + + val, err := treeToJSON(ni.Nodes[1]) + if err != nil { + return nil, err + } + + o[key] = val + } + + return o, nil +} + +func nodeToJSONArray(n *Node) ([]interface{}, error) { + a := make([]interface{}, 0, len(n.Nodes)) + for _, ni := range n.Nodes { + item, err := treeToJSON(ni) + if err != nil { + return nil, err + } + + a = append(a, item) + } + + return a, nil +} + +func treeToJSON(n *Node) (interface{}, error) { + switch n.Name { + case "true": + return true, nil + case "false": + return false, nil + case "null": + return nil, nil + case "string": + return unqouteJSONString(n.Text()) + case "number": + return parseJSONNumber(n.Text()) + case "object": + return nodeToJSONObject(n) + case "array": + return nodeToJSONArray(n) + default: + return nil, fmt.Errorf("invalid json node name: %s", n.Name) + } +} + +func checkJSON(t *testing.T, got, expected interface{}) { + if expected == nil { + if got != nil { + t.Error("expected nil", got) + } + + return + } + + switch v := expected.(type) { + case bool: + if v != got.(bool) { + t.Error("expected bool", got) + } + case string: + if v != got.(string) { + t.Error("expected string", got) + } + case int: + if v != got.(int) { + t.Error("expected int", got) + } + case float64: + if v != got.(float64) { + t.Error("expected float64", got) + } + case map[string]interface{}: + o, ok := got.(map[string]interface{}) + if !ok { + t.Error("expected object", got) + return + } + + if len(v) != len(o) { + t.Error("invalid object length, expected: %d, got: %d", len(v), len(o)) + return + } + + for key, val := range v { + gotVal, ok := o[key] + if !ok { + t.Error("expected key not found: %s", key) + return + } + + checkJSON(t, gotVal, val) + if t.Failed() { + return + } + } + case []interface{}: + a, ok := got.([]interface{}) + if !ok { + t.Error("expected array", got) + } + + if len(v) != len(a) { + t.Error("invalid array length, expected: %d, got: %d", len(v), len(a)) + return + } + + for i := range v { + checkJSON(t, a[i], v[i]) + if t.Failed() { + return + } + } + default: + t.Error("unexpected parsed type", v) + } +} + +func jsonTreeToJSON(n *Node) (interface{}, error) { + if n.Name != "json" { + return nil, fmt.Errorf("invalid root node name: %s", n.Name) + } + + if len(n.Nodes) != 1 { + return nil, fmt.Errorf("invalid root node length: %d", len(n.Nodes)) + } + + return treeToJSON(n.Nodes[0]) +} + +func TestJSON(t *testing.T) { + test(t, "json.p", "value", []testItem{{ + msg: "true", + text: "true", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "true", + }}, + }, + ignorePosition: true, + }, { + msg: "false", + text: "false", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "false", + }}, + }, + ignorePosition: true, + }, { + msg: "null", + text: "null", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "null", + }}, + }, + ignorePosition: true, + }, { + msg: "string", + text: `"\"\\n\b\t\uabcd"`, + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "string", + }}, + }, + ignorePosition: true, + }, { + msg: "number", + text: "6.62e-34", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "number", + }}, + }, + ignorePosition: true, + }, { + msg: "object", + text: `{ + "true": true, + "false": false, + "null": null, + "string": "string", + "number": 42, + "object": {}, + "array": [] + }`, + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "object", + Nodes: []*Node{{ + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "true", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "false", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "null", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "string", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "number", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "object", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "array", + }}, + }}, + }}, + }, + ignorePosition: true, + }, { + msg: "array", + text: `[true, false, null, "string", 42, { + "true": true, + "false": false, + "null": null, + "string": "string", + "number": 42, + "object": {}, + "array": [] + }, []]`, + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "array", + Nodes: []*Node{{ + Name: "true", + }, { + Name: "false", + }, { + Name: "null", + }, { + Name: "string", + }, { + Name: "number", + }, { + Name: "object", + Nodes: []*Node{{ + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "true", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "false", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "null", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "string", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "number", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "object", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "array", + }}, + }}, + }, { + Name: "array", + }}, + }}, + }, + ignorePosition: true, + }, { + msg: "bugfix, 100", + text: "100", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "number", + }}, + }, + ignorePosition: true, + }}) +} + +func TestRandomJSON(t *testing.T) { + j := generateJSON(48) + b, err := json.Marshal(j) + if err != nil { + t.Error(err) + return + } + + buf := bytes.NewBuffer(b) + + s, err := testSyntax("json.p", 0) + if err != nil { + t.Error(err) + return + } + + testParse := func(t *testing.T, buf io.Reader) { + n, err := s.Parse(buf) + if err != nil { + t.Error(err) + return + } + + jback, err := jsonTreeToJSON(n) + if err != nil { + t.Error(err) + return + } + + checkJSON(t, jback, j) + } + + t.Run("unindented", func(t *testing.T) { + testParse(t, buf) + }) + + indented := bytes.NewBuffer(nil) + if err := json.Indent(indented, b, "", " "); err != nil { + t.Error(err) + return + } + + t.Run("indented", func(t *testing.T) { + testParse(t, indented) + }) + + indentedTabs := bytes.NewBuffer(nil) + if err := json.Indent(indentedTabs, b, "", "\t"); err != nil { + t.Error(err) + return + } + + t.Run("indented with tabs", func(t *testing.T) { + testParse(t, indentedTabs) + }) +} diff --git a/keyval.p b/keyval.p new file mode 100644 index 0000000..4eff543 --- /dev/null +++ b/keyval.p @@ -0,0 +1,29 @@ +ws:alias = [ \b\f\r\t\v]; +wsnl:alias = ws | "\n"; + +comment-line:alias = "#" [^\n]*; +comment = comment-line (ws* "\n" ws* comment-line)*; + +wsc:alias = ws | comment-line; +wsnlc:alias = wsnl | comment-line; + +quoted:alias = "\"" ([^\\"] | "\\" .)* "\""; +symbol-non-ws:alias = ([^\\"\n=#.\[\] \b\f\r\t\v] | "\\" .)+; +symbol = symbol-non-ws (ws* symbol-non-ws)* | quoted; + +key-form:alias = symbol (ws* "." ws* symbol)*; +key = key-form; +group-key = (comment "\n" ws*)? "[" ws* key-form ws* "]"; + +value-chars:alias = ([^\\"\n=# \b\f\r\t\v] | "\\" .)+; +value = value-chars (ws* value-chars)* | quoted; +key-val = (comment "\n" ws*)? (key | key? ws* "=" ws* value?); + +entry:alias = group-key | key-val; +doc:root = (entry (ws* comment-line)? | wsnlc)*; + +// TODO: not tested +// set as root for streaming: +single-entry = (entry (ws* comment-line)? + | wsnlc* entry (ws* comment-line)?) + []; diff --git a/keyval_test.go b/keyval_test.go new file mode 100644 index 0000000..8c58d96 --- /dev/null +++ b/keyval_test.go @@ -0,0 +1,394 @@ +package parse + +import "testing" + +func TestKeyVal(t *testing.T) { + test(t, "keyval.p", "doc", []testItem{{ + msg: "empty", + }, { + msg: "a comment", + text: "# a comment", + }, { + msg: "a key", + text: "a key", + nodes: []*Node{{ + Name: "key-val", + to: 5, + Nodes: []*Node{{ + Name: "key", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 5, + }}, + }}, + }}, + }, { + msg: "a key with a preceeding whitespace", + text: " a key", + nodes: []*Node{{ + Name: "key-val", + from: 1, + to: 6, + Nodes: []*Node{{ + Name: "key", + from: 1, + to: 6, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 6, + }}, + }}, + }}, + }, { + msg: "a key and a comment", + text: ` + # a comment + + a key + `, + nodes: []*Node{{ + Name: "key-val", + from: 20, + to: 25, + Nodes: []*Node{{ + Name: "key", + from: 20, + to: 25, + Nodes: []*Node{{ + Name: "symbol", + from: 20, + to: 25, + }}, + }}, + }}, + }, { + msg: "a key value pair", + text: "a key = a value", + nodes: []*Node{{ + Name: "key-val", + to: 15, + Nodes: []*Node{{ + Name: "key", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 5, + }}, + }, { + Name: "value", + from: 8, + to: 15, + }}, + }}, + }, { + msg: "key value pairs with a comment at the end of line", + text: ` + a key = a value # a comment + another key = another value # another comment + `, + nodes: []*Node{{ + Name: "key-val", + from: 11, + to: 32, + Nodes: []*Node{{ + Name: "key", + from: 11, + to: 16, + Nodes: []*Node{{ + Name: "symbol", + from: 11, + to: 16, + }}, + }, { + Name: "value", + from: 25, + to: 32, + }}, + }, { + Name: "key-val", + from: 61, + to: 88, + Nodes: []*Node{{ + Name: "key", + from: 61, + to: 72, + Nodes: []*Node{{ + Name: "symbol", + from: 61, + to: 72, + }}, + }, { + Name: "value", + from: 75, + to: 88, + }}, + }}, + }, { + msg: "value without a key", + text: "= a value", + nodes: []*Node{{ + Name: "key-val", + to: 9, + Nodes: []*Node{{ + Name: "value", + from: 2, + to: 9, + }}, + }}, + }, { + msg: "a key value pair with comment", + text: ` + # a comment + a key = a value + `, + nodes: []*Node{{ + Name: "key-val", + from: 4, + to: 34, + Nodes: []*Node{{ + Name: "comment", + from: 4, + to: 15, + }, { + Name: "key", + from: 19, + to: 24, + Nodes: []*Node{{ + Name: "symbol", + from: 19, + to: 24, + }}, + }, { + Name: "value", + from: 27, + to: 34, + }}, + }}, + }, { + msg: "a key with multiple symbols", + text: "a key . with.multiple.symbols=a value", + nodes: []*Node{{ + Name: "key-val", + to: 37, + Nodes: []*Node{{ + Name: "key", + from: 0, + to: 29, + Nodes: []*Node{{ + Name: "symbol", + from: 0, + to: 5, + }, { + Name: "symbol", + from: 8, + to: 12, + }, { + Name: "symbol", + from: 13, + to: 21, + }, { + Name: "symbol", + from: 22, + to: 29, + }}, + }, { + Name: "value", + from: 30, + to: 37, + }}, + }}, + }, { + msg: "a group key", + text: ` + # a comment + [a group key.empty] + `, + nodes: []*Node{{ + Name: "group-key", + from: 4, + to: 38, + Nodes: []*Node{{ + Name: "comment", + from: 4, + to: 15, + }, { + Name: "symbol", + from: 20, + to: 31, + }, { + Name: "symbol", + from: 32, + to: 37, + }}, + }}, + }, { + msg: "a group key with multiple values", + text: ` + [foo.bar.baz] + = one + = two + = three + `, + nodes: []*Node{{ + Name: "group-key", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }}, + ignorePosition: true, + }, { + msg: "a group key with multiple values, in a single line", + text: "[foo.bar.baz] = one = two = three", + nodes: []*Node{{ + Name: "group-key", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }}, + ignorePosition: true, + }, { + msg: "full example", + text: ` + # a keyval document + + key1 = foo + key1.a = bar + key1.b = baz + + key2 = qux + + # foo bar baz values + [foo.bar.baz] + a = 1 + b = 2 # even + c = 3 + `, + nodes: []*Node{{ + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "group-key", + Nodes: []*Node{{ + Name: "comment", + }, { + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }}, + ignorePosition: true, + }}) +} diff --git a/mml.p b/mml.p new file mode 100644 index 0000000..f051784 --- /dev/null +++ b/mml.p @@ -0,0 +1,527 @@ +// whitespace is ignored except for \n which is only ignored +// most of the time, but can serve as separator in: +// - list +// - struct +// - function args +// - statements +// - list, struct and function type constraints +ws:alias = " " | "\b" | "\f" | "\r" | "\t" | "\v"; +wsnl:alias = ws | "\n"; +wsc:alias = ws | comment; +wsnlc:alias = wsc | "\n"; + +// comments can be line or block comments +line-comment-content = [^\n]*; +line-comment:alias = "//" line-comment-content; +block-comment-content = ([^*] | "*" [^/])*; +block-comment:alias = "/*" block-comment-content "*/"; +comment-part:alias = line-comment | block-comment; +comment = comment-part (ws* "\n"? ws* comment-part)*; + +decimal-digit:alias = [0-9]; +octal-digit:alias = [0-7]; +hexa-digit:alias = [0-9a-fA-F]; + +// interger examples: 42, 0666, 0xfff +decimal:alias = [1-9] decimal-digit*; +octal:alias = "0" octal-digit*; +hexa:alias = "0" [xX] hexa-digit+; +int = decimal | octal | hexa; + +// float examples: .0, 0., 3.14, 1E-12 +exponent:alias = [eE] [+\-]? decimal-digit+; +float = decimal-digit+ "." decimal-digit* exponent? + | "." decimal-digit+ exponent? + | decimal-digit+ exponent; + +// string example: "Hello, world!" +// only \ and " need to be escaped, e.g. allows new lines +// common escaped chars get unescaped, the rest gets unescaped to themselves +string = "\"" ([^\\"] | "\\" .)* "\""; + +true = "true"; +false = "false"; +bool:alias = true | false; + +// symbols normally can have only \w chars: fooBar_baz +// basic symbols cannot start with a digit +// some positions allow strings to be used as symbols, e.g: let "123" 123 +// when this is not possible, dynamic symbols need to be used, but they are +// not allowed in every case, e.g: {symbol(foo()): "bar"} +// TODO: needs decision log for dynamic symbol +// TODO: exclude keywords +// +// dynamic symbol decision log: +// - every value is equatable +// - structs can act as hashtables (optimization is transparent) +// - in structs, must differentiate between symbol and value of a symbol when used as a key +// - js style [a] would be enough for the structs +// - the variables in a scope are like fields in a struct +// - [a] would be ambigous with the list as an expression +// - a logical loophole is closed with symbol(a) +// - dynamic-symbols need to be handled differently in match expressions and type expressions +symbol = [a-zA-Z_][a-zA-Z_0-9]*; +static-symbol:alias = symbol | string; +dynamic-symbol = "symbol" wsc* "(" wsnlc* expression wsnlc* ")"; +symbol-expression:alias = static-symbol | dynamic-symbol; + +// TODO: what happens when a dynamic symbol gets exported? + +// list items are separated by comma or new line (or both) +/* + [] + [a, b, c] + [ + a + b + c + ] + [1, 2, a..., [b, c], [d, [e]]...] +*/ +spread-expression = primary-expression wsc* "..."; +list-sep:alias = wsc* ("," | "\n") (wsnlc | ",")*; +list-item:alias = expression | spread-expression; +expression-list:alias = list-item (list-sep list-item)*; + +// list example: [1, 2, 3] +// lists can be constructed with other lists: [l1..., l2...] +list-fact:alias = "[" (wsnlc | ",")* expression-list? (wsnlc | ",")* "]"; +list = list-fact; +mutable-list = "~" wsnlc* list-fact; + +indexer-symbol = "[" wsnlc* expression wsnlc* "]"; +entry = (symbol-expression | indexer-symbol) wsnlc* ":" wsnlc* expression; +entry-list:alias = (entry | spread-expression) (list-sep (entry | spread-expression))*; +struct-fact:alias = "{" (wsnlc | ",")* entry-list? (wsnlc | ",")* "}"; +struct = struct-fact; +mutable-struct = "~" wsnlc* struct-fact; + +channel = "<>" | "<" wsnlc* int wsnlc* ">"; + +and-expression:doc = "and" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; +or-expression:doc = "or" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; + +// TODO: use collect +argument-list:alias = static-symbol (list-sep static-symbol)*; +collect-symbol = "..." wsnlc* static-symbol; +function-fact:alias = "(" (wsnlc | ",")* + argument-list? + (wsnlc | ",")* + collect-symbol? + (wsnlc | ",")* ")" wsnlc* + expression; +function = "fn" wsnlc* function-fact; // can it ever cause a conflict with call and grouping? +effect = "fn" wsnlc* "~" wsnlc* function-fact; + +/* +a[42] +a[3:9] +a[:9] +a[3:] +a[b][c][d] +a.foo +a."foo" +a.symbol(foo) +*/ +range-from = expression; +range-to = expression; +range-expression:alias = range-from? wsnlc* ":" wsnlc* range-to?; +indexer-expression:alias = expression | range-expression; +expression-indexer:alias = primary-expression wsc* "[" wsnlc* indexer-expression wsnlc* "]"; +symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression; // TODO: test with a float on a new line +indexer = expression-indexer | symbol-indexer; + +function-application = primary-expression wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; + +if = "if" wsnlc* expression wsnlc* block + (wsnlc* "else" wsnlc* "if" wsnlc* expression wsnlc* block)* + (wsnlc* "else" wsnlc* block)?; + +default = "default" wsnlc* ":"; +default-line:alias = default (wsnlc | ";")* statement?; +case = "case" wsnlc* expression wsnlc* ":"; +case-line:alias = case (wsnlc | ";")* statement?; +switch = "switch" wsnlc* expression? wsnlc* "{" (wsnlc | ";")* + ((case-line | default-line) (sep (case-line | default-line | statement))*)? + (wsnlc | ";")* "}"; +// TODO: empty case not handled + +int-type = "int"; +float-type = "float"; +string-type = "string"; +bool-type = "bool"; +error-type = "error"; + +primitive-type:alias = int-type + | float-type + | string-type + | bool-type + | error-type; + +type-alias-name:alias = static-symbol; + +static-range-from = int; +static-range-to = int; +static-range-expression:alias = static-range-from? wsnlc* ":" wsnlc* static-range-to?; +items-quantifier = int | static-range-expression; +// TODO: maybe this can be confusing with matching constants. Shall we support matching constants, values? + +items-type = items-quantifier + | type-set (wsnlc* ":" wsnlc* items-quantifier)? + | static-symbol wsnlc* type-set (wsnlc* ":" wsnlc* items-quantifier)?; + +destructure-item = type-set | static-symbol wsnlc* type-set; + +collect-destructure-item = "..." wsnlc* destructure-item? + (wsnlc* ":" items-quantifier)?; +list-destructure-type = destructure-item + (list-sep destructure-item)* + (list-sep collect-destructure-item)? + | collect-destructure-item; +list-type-fact:alias = "[" (wsnlc | ",")* + (items-type | list-destructure-type)? + (wsnlc | ",")* "]"; +list-type = list-type-fact; +mutable-list-type = "~" wsnlc* list-type-fact; + +destructure-match-item = match-set + | static-symbol wsnlc* match-set + | static-symbol wsnlc* static-symbol wsnlc* match-set; + +collect-destructure-match-item = "..." wsnlc* destructure-match-item? + (wsnlc* ":" items-quantifier)?; +list-destructure-match = destructure-match-item + (list-sep destructure-match-item)* + (list-sep collect-destructure-match-item)? + | collect-destructure-match-item; +list-match-fact:alias = "[" (wsnlc | ",")* + (list-destructure-match | items-type)? + (wsnlc | ",")* "]"; +list-match = list-match-fact; +mutable-list-match = "~" wsnlc* list-match; + +entry-type = static-symbol (wsnlc* ":" wsnlc* destructure-item)?; +entry-types:alias = entry-type (list-sep entry-type)*; +struct-type-fact:alias = "{" (wsnlc | ",")* entry-types? (wsnlc | ",")* "}"; +struct-type = struct-type-fact; +mutable-struct-type = "~" wsnlc* struct-type-fact; + +entry-match = static-symbol (wsnlc* ":" wsnlc* destructure-match-item)?; +entry-matches:alias = entry-match (list-sep entry-match)*; +struct-match-fact:alias = "{" (wsnlc | ",")* entry-matches? (wsnlc | ",")* "}"; +struct-match = struct-match-fact; +mutable-struct-match = "~" wsnlc* struct-match-fact; + +arg-type = type-set | static-symbol wsnlc* type-set; +args-type:alias = arg-type (list-sep arg-type)*; +function-type-fact:alias = "(" wsnlc* args-type? wsnlc* ")" + (wsc* (type-set | static-symbol wsc* type-set))?; +function-type = "fn" wsnlc* function-type-fact; +effect-type = "fn" wsnlc* "~" wsnlc* function-type-fact; + +// TODO: heavy naming crime + +receive-direction = "receive"; +send-direction = "send"; +channel-type = "<" wsnlc* + (receive-direction | send-direction)? wsnlc* + destructure-item? + wsnlc* ">"; + +type-fact-group:alias = "(" wsnlc* type-fact wsnlc* ")"; +type-fact:alias = primitive-type + | type-alias-name + | list-type + | mutable-list-type + | struct-type + | mutable-struct-type + | function-type + | effect-type + | channel-type + | type-fact-group; + +type-set:alias = type-fact (wsnlc* "|" wsnlc* type-fact)*; +type-expression:alias = type-set | static-symbol wsc* type-set; + +match-fact:alias = list-match + | mutable-list-match + | struct-match + | mutable-struct-match; + +match-set:alias = type-set | match-fact; +match-expression:alias = match-set | static-symbol wsc* match-set; + +match-case = "case" wsnlc* match-expression wsnlc* ":"; +match-case-line:alias = match-case (wsnlc | ";")* statement?; +match = "match" wsnlc* expression wsnlc* "{" (wsnlc | ";")* + ((match-case-line | default-line) + (sep (match-case-line | default-line | statement))*)? + (wsnlc | ";")* "}"; + +conditional:alias = if + | switch + | match; + +receive-call = "receive" wsc* "(" (wsnlc | ",")* expression (wsnlc | ",")* ")"; +receive-op = "<-" wsc* primary-expression; +receive-expression-group:alias = "(" wsnlc* receive-expression wsnlc* ")"; +receive-expression:alias = receive-call | receive-op | receive-expression-group; + +receive-assign-capture:alias = assignable wsnlc* ("=" wsnlc*)? receive-expression; +receive-assignment = "set" wsnlc* receive-assign-capture; +receive-assignment-equal = assignable wsnlc* "=" wsnlc* receive-expression; +receive-capture:alias = symbol-expression wsnlc* ("=" wsnlc*)? receive-expression; +receive-definition = "let" wsnlc* receive-capture; +receive-mutable-definition = "let" wcnl* "~" wsnlc* receive-capture; +receive-statement:alias = receive-assignment | receive-definition; + +send-call:alias = "send" wsc* "(" (wsnlc | ",")* expression list-sep expression (wsnlc | ",")* ")"; +send-op:alias = primary-expression wsc* "<-" wsc* expression; +send-call-group:alias = "(" wsnlc* send wsnlc* ")"; +send = send-call | send-op | send-call-group; + +close = "close" wsc* "(" (wsnlc | ",")* expression (wsnlc | ",")* ")"; + +communication-group:alias = "(" wsnlc* communication wsnlc* ")"; +communication:alias = receive-expression | receive-statement | send | communication-group; + +select-case = "case" wsnlc* communication wsnlc* ":"; +select-case-line:alias = select-case (wsnlc | ";")* statement?; +select = "select" wsnlc* "{" (wsnlc | ";")* + ((select-case-line | default-line) + (sep (select-case-line | default-line | statement))*)? + (wsnlc | ";")* "}"; + +go = "go" wsnlc* function-application; + +/* +require . = "mml/foo" +require bar = "mml/foo" +require . "mml/foo" +require bar "mml/foo" +require "mml/foo" +require ( + . = "mml/foo" + bar = "mml/foo" + . "mml/foo" + bar "mml/foo" + "mml/foo" +) +require () +*/ +require-inline = "."; +require-fact = string + | (static-symbol | require-inline) (wsnlc* "=")? wsnlc* string; +require-facts:alias = require-fact (list-sep require-fact)*; +require-statement:alias = "require" wsnlc* require-fact; +require-statement-group:alias = "require" wsc* "(" (wsnlc | ",")* + require-facts? + (wsnlc | ",")* ")"; +require = require-statement | require-statement-group; + +panic = "panic" wsc* "(" (wsnlc | ",")* expression (wsnlc | ",")* ")"; +recover = "recover" wsc* "(" (wsnlc | ",")* ")"; + +block = "{" (wsnlc | ";")* statements? (wsnlc | ";")* "}"; +expression-group:alias = "(" wsnlc* expression wsnlc* ")"; + +primary-expression:alias = int + | float + | string + | bool + | symbol + | dynamic-symbol + | list + | mutable-list + | struct + | mutable-struct + | channel + | and-expression // only documentation + | or-expression // only documentation + | function + | effect + | indexer + | function-application // pseudo-expression + | conditional // pseudo-expression + | receive-call + | select // pseudo-expression + | recover + | block // pseudo-expression + | expression-group; + +plus = "+"; +minus = "-"; +logical-not = "!"; +binary-not = "^"; +unary-operator:alias = plus | minus | logical-not | binary-not; +unary-expression = unary-operator wsc* primary-expression | receive-op; + +mul = "*"; +div = "/"; +mod = "%"; +lshift = "<<"; +rshift = ">>"; +binary-and = "&"; +and-not = "&^"; + +add = "+"; +sub = "-"; +binary-or = "|"; +xor = "^"; + +eq = "=="; +not-eq = "!="; +less = "<"; +less-or-eq = "<="; +greater = ">"; +greater-or-eq = ">="; + +logical-and = "&&"; +logical-or = "||"; + +chain = "->"; + +binary-op0:alias = mul | div | mod | lshift | rshift | binary-and | and-not; +binary-op1:alias = add | sub | binary-or | xor; +binary-op2:alias = eq | not-eq | less | less-or-eq | greater | greater-or-eq; +binary-op3:alias = logical-and; +binary-op4:alias = logical-or; +binary-op5:alias = chain; + +operand0:alias = primary-expression | unary-expression; +operand1:alias = operand0 | binary0; +operand2:alias = operand1 | binary1; +operand3:alias = operand2 | binary2; +operand4:alias = operand3 | binary3; +operand5:alias = operand4 | binary4; + +binary0 = operand0 wsc* binary-op0 wsc* operand0; +binary1 = operand1 wsc* binary-op1 wsc* operand1; +binary2 = operand2 wsc* binary-op2 wsc* operand2; +binary3 = operand3 wsc* binary-op3 wsc* operand3; +binary4 = operand4 wsc* binary-op4 wsc* operand4; +binary5 = operand5 wsc* binary-op5 wsc* operand5; + +binary-expression:alias = binary0 | binary1 | binary2 | binary3 | binary4 | binary5; + +ternary-expression = expression wsnlc* "?" wsnlc* expression wsnlc* ":" wsnlc* expression; + +expression:alias = primary-expression + | unary-expression + | binary-expression + | ternary-expression; + +// TODO: code() +// TODO: observability + +break = "break"; +continue = "continue"; +loop-control:alias = break | continue; + +in-expression = static-symbol wsnlc* "in" wsnlc* (expression | range-expression); +loop-expression = expression | in-expression; +loop = "for" wsnlc* (block | loop-expression wsnlc* block); + +/* +a = b +set c = d +set e f +set ( + g = h + i j +) +*/ +assignable:alias = symbol-expression | indexer; +assign-capture = assignable wsnlc* ("=" wsnlc*)? expression; +assign-set:alias = "set" wsnlc* assign-capture; +assign-equal = assignable wsnlc* "=" wsnlc* expression; +assign-captures:alias = assign-capture (list-sep assign-capture)*; +assign-group:alias = "set" wsnlc* "(" (wsnlc | ",")* assign-captures? (wsnlc | ",")* ")"; +assignment = assign-set | assign-equal | assign-group; + +/* +let a = b +let c d +let ~ e = f +let ~ g h +let ( + i = j + k l + ~ m = n + ~ o p +) +let ~ ( + q = r + s t +) +*/ +value-capture-fact:alias = symbol-expression wsnlc* ("=" wsnlc*)? expression; +value-capture = value-capture-fact; +mutable-capture = "~" wsnlc* value-capture-fact; +value-definition = "let" wsnlc* (value-capture | mutable-capture); +value-captures:alias = value-capture (list-sep value-capture)*; +mixed-captures:alias = (value-capture | mutable-capture) (list-sep (value-capture | mutable-capture))*; +value-definition-group = "let" wsnlc* "(" (wsnlc | ",")* mixed-captures? (wsnlc | ",")* ")"; +mutable-definition-group = "let" wsnlc* "~" wsnlc* "(" (wsnlc | ",")* value-captures? (wsnlc | ",")* ")"; + +/* +fn a() b +fn ~ c() d +fn ( + e() f + ~ g() h +) +fn ~ ( + i() + j() +) +*/ +function-definition-fact:alias = static-symbol wsnlc* function-fact; +function-capture = function-definition-fact; +effect-capture = "~" wsnlc* function-definition-fact; +function-definition = "fn" wsnlc* (function-capture | effect-capture); +function-captures:alias = function-capture (list-sep function-capture)*; +mixed-function-captures:alias = (function-capture | effect-capture) + (list-sep (function-capture | effect-capture))*; +function-definition-group = "fn" wsnlc* "(" (wsnlc | ",")* + mixed-function-captures? + (wsnlc | ",")* ")"; +effect-definition-group = "fn" wsnlc* "~" wsnlc* "(" (wsnlc | ",")* + function-captures? + (wsnlc | ",")* ")"; + +definition:alias = value-definition + | value-definition-group + | mutable-definition-group + | function-definition + | function-definition-group + | effect-definition-group; + +// TODO: cannot do: +// type alias a int|fn () string|error +// needs grouping of type-set + +type-alias = "type" wsnlc* "alias" wsnlc* static-symbol wsnlc* type-set; +type-constraint = "type" wsnlc* static-symbol wsnlc* type-set; + +statement-group:alias = "(" wsnlc* statement wsnlc* ")"; + +statement:alias = send + | close + | panic + | require + | loop-control + | go + | loop + | assignment + | definition + | expression + | type-alias + | type-constraint + | statement-group; + +shebang-command = [^\n]*; +shebang = "#!" shebang-command "\n"; +sep:alias = wsc* (";" | "\n") (wsnlc | ";")*; +statements:alias = statement (sep statement)*; +mml:root = shebang? (wsnlc | ";")* statements? (wsnlc | ";")*; diff --git a/mml_test.go b/mml_test.go new file mode 100644 index 0000000..19c8678 --- /dev/null +++ b/mml_test.go @@ -0,0 +1,2791 @@ +package parse + +import "testing" + +func TestMML(t *testing.T) { + test(t, "mml.p", "mml", []testItem{{ + msg: "empty", + node: &Node{Name: "mml"}, + }, { + msg: "single line comment", + text: "// foo bar baz", + nodes: []*Node{{ + Name: "comment", + to: 14, + Nodes: []*Node{{ + Name: "line-comment-content", + from: 2, + to: 14, + }}, + }}, + }, { + msg: "multiple line comments", + text: "// foo bar\n// baz qux", + nodes: []*Node{{ + Name: "comment", + to: 21, + Nodes: []*Node{{ + Name: "line-comment-content", + from: 2, + to: 10, + }, { + Name: "line-comment-content", + from: 13, + to: 21, + }}, + }}, + }, { + msg: "block comment", + text: "/* foo bar baz */", + nodes: []*Node{{ + Name: "comment", + to: 17, + Nodes: []*Node{{ + Name: "block-comment-content", + from: 2, + to: 15, + }}, + }}, + }, { + msg: "block comments", + text: "/* foo bar */\n/* baz qux */", + nodes: []*Node{{ + Name: "comment", + to: 27, + Nodes: []*Node{{ + Name: "block-comment-content", + from: 2, + to: 11, + }, { + Name: "block-comment-content", + from: 16, + to: 25, + }}, + }}, + }, { + msg: "mixed comments", + text: "// foo\n/* bar */\n// baz", + nodes: []*Node{{ + Name: "comment", + to: 23, + Nodes: []*Node{{ + Name: "line-comment-content", + from: 2, + to: 6, + }, { + Name: "block-comment-content", + from: 9, + to: 14, + }, { + Name: "line-comment-content", + from: 19, + to: 23, + }}, + }}, + }, { + msg: "int", + text: "42", + nodes: []*Node{{ + Name: "int", + to: 2, + }}, + }, { + msg: "ints", + text: "1; 2; 3", + nodes: []*Node{{ + Name: "int", + to: 1, + }, { + Name: "int", + from: 3, + to: 4, + }, { + Name: "int", + from: 6, + to: 7, + }}, + }, { + msg: "int, octal", + text: "052", + nodes: []*Node{{ + Name: "int", + to: 3, + }}, + }, { + msg: "int, hexa", + text: "0x2a", + nodes: []*Node{{ + Name: "int", + to: 4, + }}, + }, { + msg: "float, 0.", + text: "0.", + nodes: []*Node{{ + Name: "float", + to: 2, + }}, + }, { + msg: "float, 72.40", + text: "72.40", + nodes: []*Node{{ + Name: "float", + to: 5, + }}, + }, { + msg: "float, 072.40", + text: "072.40", + nodes: []*Node{{ + Name: "float", + to: 6, + }}, + }, { + msg: "float, 2.71828", + text: "2.71828", + nodes: []*Node{{ + Name: "float", + to: 7, + }}, + }, { + msg: "float, 6.67428e-11", + text: "6.67428e-11", + nodes: []*Node{{ + Name: "float", + to: 11, + }}, + }, { + msg: "float, 1E6", + text: "1E6", + nodes: []*Node{{ + Name: "float", + to: 3, + }}, + }, { + msg: "float, .25", + text: ".25", + nodes: []*Node{{ + Name: "float", + to: 3, + }}, + }, { + msg: "float, .12345E+5", + text: ".12345E+5", + nodes: []*Node{{ + Name: "float", + to: 9, + }}, + }, { + msg: "string, empty", + text: "\"\"", + nodes: []*Node{{ + Name: "string", + to: 2, + }}, + }, { + msg: "string", + text: "\"foo\"", + nodes: []*Node{{ + Name: "string", + to: 5, + }}, + }, { + msg: "string, with new line", + text: "\"foo\nbar\"", + nodes: []*Node{{ + Name: "string", + to: 9, + }}, + }, { + msg: "string, with escaped new line", + text: "\"foo\\nbar\"", + nodes: []*Node{{ + Name: "string", + to: 10, + }}, + }, { + msg: "string, with quotes", + text: "\"foo \\\"bar\\\" baz\"", + nodes: []*Node{{ + Name: "string", + to: 17, + }}, + }, { + msg: "bool, true", + text: "true", + nodes: []*Node{{ + Name: "true", + to: 4, + }}, + }, { + msg: "bool, false", + text: "false", + nodes: []*Node{{ + Name: "false", + to: 5, + }}, + }, { + msg: "symbol", + text: "foo", + nodes: []*Node{{ + Name: "symbol", + to: 3, + }}, + }, { + msg: "dynamic-symbol", + text: "symbol(a)", + nodes: []*Node{{ + Name: "dynamic-symbol", + to: 9, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }}, + }}, + }, { + msg: "empty list", + text: "[]", + nodes: []*Node{{ + Name: "list", + to: 2, + }}, + }, { + msg: "list", + text: "[a, b, c]", + nodes: []*Node{{ + Name: "list", + to: 9, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 2, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 7, + to: 8, + }}, + }}, + }, { + msg: "list, new lines", + text: `[ + a + b + c + ]`, + nodes: []*Node{{ + Name: "list", + to: 20, + Nodes: []*Node{{ + Name: "symbol", + from: 5, + to: 6, + }, { + Name: "symbol", + from: 10, + to: 11, + }, { + Name: "symbol", + from: 15, + to: 16, + }}, + }}, + }, { + msg: "list, complex", + text: "[a, b, c..., [d, e], [f, [g]]...]", + nodes: []*Node{{ + Name: "list", + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 2, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "spread-expression", + from: 7, + to: 11, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }}, + }, { + Name: "list", + from: 13, + to: 19, + Nodes: []*Node{{ + Name: "symbol", + from: 14, + to: 15, + }, { + Name: "symbol", + from: 17, + to: 18, + }}, + }, { + Name: "spread-expression", + from: 21, + to: 32, + Nodes: []*Node{{ + Name: "list", + from: 21, + to: 29, + Nodes: []*Node{{ + Name: "symbol", + from: 22, + to: 23, + }, { + Name: "list", + from: 25, + to: 28, + Nodes: []*Node{{ + Name: "symbol", + from: 26, + to: 27, + }}, + }}, + }}, + }}, + }}, + }, { + msg: "mutable list", + text: "~[a, b, c]", + nodes: []*Node{{ + Name: "mutable-list", + to: 10, + Nodes: []*Node{{ + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "symbol", + from: 5, + to: 6, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "empty struct", + text: "{}", + nodes: []*Node{{ + Name: "struct", + to: 2, + }}, + }, { + msg: "struct", + text: "{foo: 1, \"bar\": 2, symbol(baz): 3, [qux]: 4}", + nodes: []*Node{{ + Name: "struct", + to: 44, + Nodes: []*Node{{ + Name: "entry", + from: 1, + to: 7, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 4, + }, { + Name: "int", + from: 6, + to: 7, + }}, + }, { + Name: "entry", + from: 9, + to: 17, + Nodes: []*Node{{ + Name: "string", + from: 9, + to: 14, + }, { + Name: "int", + from: 16, + to: 17, + }}, + }, { + Name: "entry", + from: 19, + to: 33, + Nodes: []*Node{{ + Name: "dynamic-symbol", + from: 19, + to: 30, + Nodes: []*Node{{ + Name: "symbol", + from: 26, + to: 29, + }}, + }, { + Name: "int", + from: 32, + to: 33, + }}, + }, { + Name: "entry", + from: 35, + to: 43, + Nodes: []*Node{{ + Name: "indexer-symbol", + from: 35, + to: 40, + Nodes: []*Node{{ + Name: "symbol", + from: 36, + to: 39, + }}, + }, { + Name: "int", + from: 42, + to: 43, + }}, + }}, + }}, + }, { + msg: "struct, complex", + text: "{foo: 1, {bar: 2}..., {baz: {}}...}", + nodes: []*Node{{ + Name: "struct", + to: 35, + Nodes: []*Node{{ + Name: "entry", + from: 1, + to: 7, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 4, + }, { + Name: "int", + from: 6, + to: 7, + }}, + }, { + Name: "spread-expression", + from: 9, + to: 20, + Nodes: []*Node{{ + Name: "struct", + from: 9, + to: 17, + Nodes: []*Node{{ + Name: "entry", + from: 10, + to: 16, + Nodes: []*Node{{ + Name: "symbol", + from: 10, + to: 13, + }, { + Name: "int", + from: 15, + to: 16, + }}, + }}, + }}, + }, { + Name: "spread-expression", + from: 22, + to: 34, + Nodes: []*Node{{ + Name: "struct", + from: 22, + to: 31, + Nodes: []*Node{{ + Name: "entry", + from: 23, + to: 30, + Nodes: []*Node{{ + Name: "symbol", + from: 23, + to: 26, + }, { + Name: "struct", + from: 28, + to: 30, + }}, + }}, + }}, + }}, + }}, + }, { + msg: "struct with indexer key", + text: "{[a]: b}", + nodes: []*Node{{ + Name: "struct", + to: 8, + Nodes: []*Node{{ + Name: "entry", + from: 1, + to: 7, + Nodes: []*Node{{ + Name: "indexer-symbol", + from: 1, + to: 4, + Nodes: []*Node{{ + Name: "symbol", + from: 2, + to: 3, + }}, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }}, + }, { + msg: "mutable struct", + text: "~{foo: 1}", + nodes: []*Node{{ + Name: "mutable-struct", + to: 9, + Nodes: []*Node{{ + Name: "entry", + from: 2, + to: 8, + Nodes: []*Node{{ + Name: "symbol", + from: 2, + to: 5, + }, { + Name: "int", + from: 7, + to: 8, + }}, + }}, + }}, + }, { + msg: "channel", + text: "<>", + nodes: []*Node{{ + Name: "channel", + to: 2, + }}, + }, { + msg: "buffered channel", + text: "<42>", + nodes: []*Node{{ + Name: "channel", + to: 4, + Nodes: []*Node{{ + Name: "int", + from: 1, + to: 3, + }}, + }}, + }, { + msg: "and expression", + text: "and(a, b, c)", + nodes: []*Node{{ + Name: "function-application", + to: 12, + Nodes: []*Node{{ + Name: "symbol", + to: 3, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "symbol", + from: 10, + to: 11, + }}, + }}, + }, { + msg: "or expression", + text: "or(a, b, c)", + nodes: []*Node{{ + Name: "function-application", + to: 11, + Nodes: []*Node{{ + Name: "symbol", + to: 2, + }, { + Name: "symbol", + from: 3, + to: 4, + }, { + Name: "symbol", + from: 6, + to: 7, + }, { + Name: "symbol", + from: 9, + to: 10, + }}, + }}, + }, { + msg: "function", + text: "fn () 42", + nodes: []*Node{{ + Name: "function", + to: 8, + Nodes: []*Node{{ + Name: "int", + from: 6, + to: 8, + }}, + }}, + }, { + msg: "function, noop", + text: "fn () {;}", + nodes: []*Node{{ + Name: "function", + to: 9, + Nodes: []*Node{{ + Name: "block", + from: 6, + to: 9, + }}, + }}, + }, { + msg: "function with args", + text: "fn (a, b, c) [a, b, c]", + nodes: []*Node{{ + Name: "function", + to: 22, + Nodes: []*Node{{ + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "symbol", + from: 10, + to: 11, + }, { + Name: "list", + from: 13, + to: 22, + Nodes: []*Node{{ + Name: "symbol", + from: 14, + to: 15, + }, { + Name: "symbol", + from: 17, + to: 18, + }, { + Name: "symbol", + from: 20, + to: 21, + }}, + }}, + }}, + }, { + msg: "function with args in new lines", + text: `fn ( + a + b + c + ) [a, b, c]`, + nodes: []*Node{{ + Name: "function", + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 8, + to: 9, + }, { + Name: "symbol", + from: 13, + to: 14, + }, { + Name: "symbol", + from: 18, + to: 19, + }, { + Name: "list", + from: 24, + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 25, + to: 26, + }, { + Name: "symbol", + from: 28, + to: 29, + }, { + Name: "symbol", + from: 31, + to: 32, + }}, + }}, + }}, + }, { + msg: "function with spread arg", + text: "fn (a, b, ...c) [a, b, c]", + nodes: []*Node{{ + Name: "function", + to: 25, + Nodes: []*Node{{ + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "collect-symbol", + from: 10, + to: 14, + Nodes: []*Node{{ + Name: "symbol", + from: 13, + to: 14, + }}, + }, { + Name: "list", + from: 16, + to: 25, + Nodes: []*Node{{ + Name: "symbol", + from: 17, + to: 18, + }, { + Name: "symbol", + from: 20, + to: 21, + }, { + Name: "symbol", + from: 23, + to: 24, + }}, + }}, + }}, + }, { + msg: "effect", + text: "fn ~ () 42", + nodes: []*Node{{ + Name: "effect", + to: 10, + Nodes: []*Node{{ + Name: "int", + from: 8, + to: 10, + }}, + }}, + }, { + msg: "indexer", + text: "a[42]", + nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "int", + from: 2, + to: 4, + }}, + }}, + }, { + msg: "range indexer", + text: "a[3:9]", + nodes: []*Node{{ + Name: "indexer", + to: 6, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "range-from", + from: 2, + to: 3, + Nodes: []*Node{{ + Name: "int", + from: 2, + to: 3, + }}, + }, { + Name: "range-to", + from: 4, + to: 5, + Nodes: []*Node{{ + Name: "int", + from: 4, + to: 5, + }}, + }}, + }}, + }, { + msg: "range indexer, lower unbound", + text: "a[:9]", + nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "range-to", + from: 3, + to: 4, + Nodes: []*Node{{ + Name: "int", + from: 3, + to: 4, + }}, + }}, + }}, + }, { + msg: "range indexer, upper unbound", + text: "a[3:]", + nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "range-from", + from: 2, + to: 3, + Nodes: []*Node{{ + Name: "int", + from: 2, + to: 3, + }}, + }}, + }}, + }, { + msg: "indexer, chained", + text: "a[b][c][d]", + nodes: []*Node{{ + Name: "indexer", + to: 10, + Nodes: []*Node{{ + Name: "indexer", + to: 7, + Nodes: []*Node{{ + Name: "indexer", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }, { + Name: "symbol", + from: 5, + to: 6, + }}, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "symbol indexer", + text: "a.b", + nodes: []*Node{{ + Name: "indexer", + to: 3, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }}, + }, { + msg: "symbol indexer, with string", + text: "a.\"b\"", + nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "string", + from: 2, + to: 5, + }}, + }}, + }, { + msg: "symbol indexer, with dynamic symbol", + text: "a.symbol(b)", + nodes: []*Node{{ + Name: "indexer", + to: 11, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "dynamic-symbol", + from: 2, + to: 11, + Nodes: []*Node{{ + Name: "symbol", + from: 9, + to: 10, + }}, + }}, + }}, + }, { + msg: "chained symbol indexer", + text: "a.b.c.d", + nodes: []*Node{{ + Name: "indexer", + to: 7, + Nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "indexer", + to: 3, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }, { + Name: "symbol", + from: 4, + to: 5, + }}, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "chained symbol indexer on new line", + text: "a\n.b\n.c", + nodes: []*Node{{ + Name: "indexer", + to: 7, + Nodes: []*Node{{ + Name: "indexer", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 3, + to: 4, + }}, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "chained symbol indexer on new line after dot", + text: "a.\nb.\nc", + nodes: []*Node{{ + Name: "indexer", + to: 7, + Nodes: []*Node{{ + Name: "indexer", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 3, + to: 4, + }}, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "function application", + text: "f()", + nodes: []*Node{{ + Name: "function-application", + to: 3, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }}, + }}, + }, { + msg: "function application, single arg", + text: "f(a)", + nodes: []*Node{{ + Name: "function-application", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }}, + }, { + msg: "function application, multiple args", + text: "f(a, b, c)", + nodes: []*Node{{ + Name: "function-application", + to: 10, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "symbol", + from: 5, + to: 6, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "function application, multiple args, new line", + text: "f(a\nb\nc\n)", + nodes: []*Node{{ + Name: "function-application", + to: 9, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "function application, spread", + text: "f(a, b..., c, d...)", + nodes: []*Node{{ + Name: "function-application", + to: 19, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "spread-expression", + from: 5, + to: 9, + Nodes: []*Node{{ + Name: "symbol", + from: 5, + to: 6, + }}, + }, { + Name: "symbol", + from: 11, + to: 12, + }, { + Name: "spread-expression", + from: 14, + to: 18, + Nodes: []*Node{{ + Name: "symbol", + from: 14, + to: 15, + }}, + }}, + }}, + }, { + msg: "chained function application", + text: "f(a)(b)(c)", + nodes: []*Node{{ + Name: "function-application", + to: 10, + Nodes: []*Node{{ + Name: "function-application", + to: 7, + Nodes: []*Node{{ + Name: "function-application", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }, { + Name: "symbol", + from: 5, + to: 6, + }}, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "embedded function application", + text: "f(g(h(a)))", + nodes: []*Node{{ + Name: "function-application", + to: 10, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "function-application", + from: 2, + to: 9, + Nodes: []*Node{{ + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "function-application", + from: 4, + to: 8, + Nodes: []*Node{{ + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }}, + }}, + }, { + msg: "if", + text: "if a { b() }", + nodes: []*Node{{ + Name: "if", + to: 12, + Nodes: []*Node{{ + Name: "symbol", + from: 3, + to: 4, + }, { + Name: "block", + from: 5, + to: 12, + Nodes: []*Node{{ + Name: "function-application", + from: 7, + to: 10, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }}, + }}, + }}, + }}, + }, { + msg: "if, else", + text: "if a { b } else { c }", + nodes: []*Node{{ + Name: "if", + to: 21, + Nodes: []*Node{{ + Name: "symbol", + from: 3, + to: 4, + }, { + Name: "block", + from: 5, + to: 10, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }}, + }, { + Name: "block", + from: 16, + to: 21, + Nodes: []*Node{{ + Name: "symbol", + from: 18, + to: 19, + }}, + }}, + }}, + }, { + msg: "if, else if, else if, else", + text: ` + if a { b } + else if c { d } + else if e { f } + else { g } + `, + nodes: []*Node{{ + Name: "if", + from: 4, + to: 66, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "block", + from: 9, + to: 14, + Nodes: []*Node{{ + Name: "symbol", + from: 11, + to: 12, + }}, + }, { + Name: "symbol", + from: 26, + to: 27, + }, { + Name: "block", + from: 28, + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 30, + to: 31, + }}, + }, { + Name: "symbol", + from: 45, + to: 46, + }, { + Name: "block", + from: 47, + to: 52, + Nodes: []*Node{{ + Name: "symbol", + from: 49, + to: 50, + }}, + }, { + Name: "block", + from: 61, + to: 66, + Nodes: []*Node{{ + Name: "symbol", + from: 63, + to: 64, + }}, + }}, + }}, + }, { + msg: "switch, empty", + text: "switch {default:}", + nodes: []*Node{{ + Name: "switch", + to: 17, + Nodes: []*Node{{ + Name: "default", + from: 8, + to: 16, + }}, + }}, + }, { + msg: "switch, single case", + text: "switch a {case b: c}", + nodes: []*Node{{ + Name: "switch", + to: 20, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "case", + from: 10, + to: 17, + Nodes: []*Node{{ + Name: "symbol", + from: 15, + to: 16, + }}, + }, { + Name: "symbol", + from: 18, + to: 19, + }}, + }}, + }, { + msg: "switch", + text: "switch a {case b: c; case d: e; default: f}", + nodes: []*Node{{ + Name: "switch", + to: 43, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "case", + from: 10, + to: 17, + Nodes: []*Node{{ + Name: "symbol", + from: 15, + to: 16, + }}, + }, { + Name: "symbol", + from: 18, + to: 19, + }, { + Name: "case", + from: 21, + to: 28, + Nodes: []*Node{{ + Name: "symbol", + from: 26, + to: 27, + }}, + }, { + Name: "symbol", + from: 29, + to: 30, + }, { + Name: "default", + from: 32, + to: 40, + }, { + Name: "symbol", + from: 41, + to: 42, + }}, + }}, + }, { + msg: "switch, all new lines", + text: `switch + a + { + case + b + : + c + case + d + : + e + default + : + f + }`, + nodes: []*Node{{ + Name: "switch", + to: 87, + Nodes: []*Node{{ + Name: "symbol", + from: 10, + to: 11, + }, { + Name: "case", + from: 20, + to: 34, + Nodes: []*Node{{ + Name: "symbol", + from: 28, + to: 29, + }}, + }, { + Name: "symbol", + from: 38, + to: 39, + }, { + Name: "case", + from: 43, + to: 57, + Nodes: []*Node{{ + Name: "symbol", + from: 51, + to: 52, + }}, + }, { + Name: "symbol", + from: 61, + to: 62, + }, { + Name: "default", + from: 66, + to: 78, + }, { + Name: "symbol", + from: 82, + to: 83, + }}, + }}, + }, { + msg: "match expression, empty", + text: "match a {}", + nodes: []*Node{{ + Name: "match", + to: 10, + Nodes: []*Node{{ + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "match expression", + text: `match a { + case [first, ...rest]: first + }`, + nodes: []*Node{{ + Name: "match", + to: 45, + Nodes: []*Node{{ + Name: "symbol", + from: 6, + to: 7, + }, { + Name: "match-case", + from: 13, + to: 35, + Nodes: []*Node{{ + Name: "list-type", + from: 18, + to: 34, + Nodes: []*Node{{ + Name: "list-destructure-type", + from: 19, + to: 33, + Nodes: []*Node{{ + Name: "destructure-item", + from: 19, + to: 24, + Nodes: []*Node{{ + Name: "symbol", + from: 19, + to: 24, + }}, + }, { + Name: "collect-destructure-item", + from: 26, + to: 33, + Nodes: []*Node{{ + Name: "destructure-item", + from: 29, + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 29, + to: 33, + }}, + }}, + }}, + }}, + }}, + }, { + Name: "symbol", + from: 36, + to: 41, + }}, + }}, + }, { + msg: "match expression, multiple cases", + text: `match a { + case [0]: [] + case [2:]: a[2:] + default: error("invalid length") + }`, + nodes: []*Node{{ + Name: "match", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "list-type", + Nodes: []*Node{{ + Name: "items-type", + Nodes: []*Node{{ + Name: "items-quantifier", + Nodes: []*Node{{ + Name: "int", + }}, + }}, + }}, + }}, + }, { + Name: "list", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "list-type", + Nodes: []*Node{{ + Name: "items-type", + Nodes: []*Node{{ + Name: "items-quantifier", + Nodes: []*Node{{ + Name: "static-range-from", + Nodes: []*Node{{ + Name: "int", + }}, + }}, + }}, + }}, + }}, + }, { + Name: "indexer", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "range-from", + Nodes: []*Node{{ + Name: "int", + }}, + }}, + }, { + Name: "default", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "match function", + text: `match a { + case fn () int: a() + default: 42 + }`, + nodes: []*Node{{ + Name: "match", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "function-type", + Nodes: []*Node{{ + Name: "int-type", + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "default", + }, { + Name: "int", + }}, + }}, + ignorePosition: true, + }, { + msg: "match expression, combined", + text: `match a { + case [fn (int)]: a[0]() + default: 42 + }`, + nodes: []*Node{{ + Name: "match", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "list-type", + Nodes: []*Node{{ + Name: "items-type", + Nodes: []*Node{{ + Name: "function-type", + Nodes: []*Node{{ + Name: "arg-type", + Nodes: []*Node{{ + Name: "int-type", + }}, + }}, + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "indexer", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "int", + }}, + }}, + }, { + Name: "default", + }, { + Name: "int", + }}, + }}, + ignorePosition: true, + }, { + msg: "match expression, complex", + text: `match a { + case [first T int|string, op fn ([T, int, ...T]) int, ...rest T]: + op([first, now(), rest...]) + default: + error("invalid list") + }`, + nodes: []*Node{{ + Name: "match", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "list-match", + Nodes: []*Node{{ + Name: "list-destructure-match", + Nodes: []*Node{{ + Name: "destructure-match-item", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "int-type", + }, { + Name: "string-type", + }}, + }, { + Name: "destructure-match-item", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-type", + Nodes: []*Node{{ + Name: "arg-type", + Nodes: []*Node{{ + Name: "list-type", + Nodes: []*Node{{ + Name: "list-destructure-type", + Nodes: []*Node{{ + Name: "destructure-item", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "destructure-item", + Nodes: []*Node{{ + Name: "int-type", + }}, + }, { + Name: "collect-destructure-item", + Nodes: []*Node{{ + Name: "destructure-item", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }}, + }}, + }, { + Name: "int-type", + }}, + }}, + }, { + Name: "collect-destructure-match-item", + Nodes: []*Node{{ + Name: "destructure-match-item", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "spread-expression", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "default", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "receive op", + text: "<-chan", + nodes: []*Node{{ + Name: "unary-expression", + Nodes: []*Node{{ + Name: "receive-op", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "send op", + text: "chan <- a", + nodes: []*Node{{ + Name: "send", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "select, empty", + text: `select { + }`, + nodes: []*Node{{ + Name: "select", + to: 12, + }}, + }, { + msg: "select", + text: `select { + case let a <-r: s <- a + case s <- f(): g() + default: h() + }`, + nodes: []*Node{{ + Name: "select", + Nodes: []*Node{{ + Name: "select-case", + Nodes: []*Node{{ + Name: "receive-definition", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "receive-op", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "send", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "select-case", + Nodes: []*Node{{ + Name: "send", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "default", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "select, call", + text: `select { + case let a receive(r): f() + case send(s, g()): h() + default: i() + }`, + nodes: []*Node{{ + Name: "select", + Nodes: []*Node{{ + Name: "select-case", + Nodes: []*Node{{ + Name: "receive-definition", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "receive-call", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "select-case", + Nodes: []*Node{{ + Name: "send", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "default", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "go", + text: "go f()", + nodes: []*Node{{ + Name: "go", + Nodes: []*Node{{ + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require, dot, equal", + text: "require . = \"mml/foo\"", + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "require-inline", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require, symbol, equal", + text: "require bar = \"mml/foo\"", + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require, symbol", + text: "require bar \"mml/foo\"", + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require", + text: "require \"mml/foo\"", + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require, group", + text: `require ( + . = "mml/foo" + bar = "mml/foo" + . "mml/foo" + bar "mml/foo" + "mml/foo" + )`, + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "require-inline", + }, { + Name: "string", + }}, + }, { + Name: "require-fact", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }, { + Name: "require-fact", + Nodes: []*Node{{ + Name: "require-inline", + }, { + Name: "string", + }}, + }, { + Name: "require-fact", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }, { + Name: "require-fact", + Nodes: []*Node{{ + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "expression group", + text: "(fn (a) a)(a)", + nodes: []*Node{{ + Name: "function-application", + Nodes: []*Node{{ + Name: "function", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "unary operator", + text: "!foo", + nodes: []*Node{{ + Name: "unary-expression", + Nodes: []*Node{{ + Name: "logical-not", + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "binary 0", + text: "a * b", + nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "binary 1", + text: "a * b + c * d", + nodes: []*Node{{ + Name: "binary1", + Nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }, { + Name: "add", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "binary 2", + text: "a * b + c * d == e * f", + nodes: []*Node{{ + Name: "binary2", + Nodes: []*Node{{ + Name: "binary1", + Nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }, { + Name: "add", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "eq", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "binary 3, 4, 5", + text: "a * b + c * d == e * f && g || h -> f()", + nodes: []*Node{{ + Name: "binary5", + Nodes: []*Node{{ + Name: "binary4", + Nodes: []*Node{{ + Name: "binary3", + Nodes: []*Node{{ + Name: "binary2", + Nodes: []*Node{{ + Name: "binary1", + Nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }, { + Name: "add", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "eq", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "logical-and", + }, { + Name: "symbol", + }}, + }, { + Name: "logical-or", + }, { + Name: "symbol", + }}, + }, { + Name: "chain", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "ternary expression", + text: "a ? b : c", + nodes: []*Node{{ + Name: "ternary-expression", + to: 9, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "multiple ternary expressions, consequence", + text: "a ? b ? c : d : e", + nodes: []*Node{{ + Name: "ternary-expression", + to: 17, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "ternary-expression", + from: 4, + to: 13, + Nodes: []*Node{{ + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 8, + to: 9, + }, { + Name: "symbol", + from: 12, + to: 13, + }}, + }, { + Name: "symbol", + from: 16, + to: 17, + }}, + }}, + }, { + msg: "multiple ternary expressions, alternative", + text: "a ? b : c ? d : e", + nodes: []*Node{{ + Name: "ternary-expression", + to: 17, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "ternary-expression", + from: 8, + to: 17, + Nodes: []*Node{{ + Name: "symbol", + from: 8, + to: 9, + }, { + Name: "symbol", + from: 12, + to: 13, + }, { + Name: "symbol", + from: 16, + to: 17, + }}, + }}, + }}, + }, { + msg: "infinite loop", + text: "for {}", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "block", + }}, + }}, + ignorePosition: true, + }, { + msg: "conditional loop", + text: "for foo {}", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "loop-expression", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "block", + }}, + }}, + ignorePosition: true, + }, { + msg: "in list loop", + text: "for i in [1, 2, 3] {}", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "loop-expression", + Nodes: []*Node{{ + Name: "in-expression", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "int", + }, { + Name: "int", + }, { + Name: "int", + }}, + }}, + }}, + }, { + Name: "block", + }}, + }}, + ignorePosition: true, + }, { + msg: "in range loop", + text: "for i in -3:42 {}", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "loop-expression", + Nodes: []*Node{{ + Name: "in-expression", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "range-from", + Nodes: []*Node{{ + Name: "unary-expression", + Nodes: []*Node{{ + Name: "minus", + }, { + Name: "int", + }}, + }}, + }, { + Name: "range-to", + Nodes: []*Node{{ + Name: "int", + }}, + }}, + }}, + }, { + Name: "block", + }}, + }}, + ignorePosition: true, + }, { + msg: "loop control", + text: `for i in l { + if i % 2 == 0 { + break + } + }`, + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "loop-expression", + Nodes: []*Node{{ + Name: "in-expression", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "block", + Nodes: []*Node{{ + Name: "if", + Nodes: []*Node{{ + Name: "binary2", + Nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mod", + }, { + Name: "int", + }}, + }, { + Name: "eq", + }, { + Name: "int", + }}, + }, { + Name: "block", + Nodes: []*Node{{ + Name: "break", + }}, + }}, + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "assign, eq", + text: "a = b", + nodes: []*Node{{ + Name: "assignment", + Nodes: []*Node{{ + Name: "assign-equal", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "assign, set, eq", + text: "set a = b", + nodes: []*Node{{ + Name: "assignment", + Nodes: []*Node{{ + Name: "assign-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "assign, set", + text: "set a b", + nodes: []*Node{{ + Name: "assignment", + Nodes: []*Node{{ + Name: "assign-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "assign, group", + text: `set ( + a = b + c d + )`, + nodes: []*Node{{ + Name: "assignment", + Nodes: []*Node{{ + Name: "assign-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "assign-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define, eq", + text: "let a = b", + nodes: []*Node{{ + Name: "value-definition", + Nodes: []*Node{{ + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define", + text: "let a b", + nodes: []*Node{{ + Name: "value-definition", + Nodes: []*Node{{ + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define mutable, eq", + text: "let ~ a = b", + nodes: []*Node{{ + Name: "value-definition", + Nodes: []*Node{{ + Name: "mutable-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define mutable", + text: "let ~ a b", + nodes: []*Node{{ + Name: "value-definition", + Nodes: []*Node{{ + Name: "mutable-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "mixed define group", + text: `let ( + a = b + c d + ~ e f + ~ g h + )`, + nodes: []*Node{{ + Name: "value-definition-group", + Nodes: []*Node{{ + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "mutable-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "mutable-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "mutable define group", + text: `let ~ ( + a = b + c d + )`, + nodes: []*Node{{ + Name: "mutable-definition-group", + Nodes: []*Node{{ + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define function", + text: "fn a() b", + nodes: []*Node{{ + Name: "function-definition", + Nodes: []*Node{{ + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define effect", + text: "fn ~ a() b", + nodes: []*Node{{ + Name: "function-definition", + Nodes: []*Node{{ + Name: "effect-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define function group", + text: `fn ( + a() b + ~ c() d + )`, + nodes: []*Node{{ + Name: "function-definition-group", + Nodes: []*Node{{ + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "effect-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define effect group", + text: `fn ~ ( + a() b + c() d + )`, + nodes: []*Node{{ + Name: "effect-definition-group", + Nodes: []*Node{{ + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "type constraint", + text: ` + type a fn ([]) int + fn a(l) len(l) + `, + nodes: []*Node{{ + Name: "type-constraint", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-type", + Nodes: []*Node{{ + Name: "arg-type", + Nodes: []*Node{{ + Name: "list-type", + }}, + }, { + Name: "int-type", + }}, + }}, + }, { + Name: "function-definition", + Nodes: []*Node{{ + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "type alias", + text: "type alias a int|(fn () int|string)|string", + nodes: []*Node{{ + Name: "type-alias", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "int-type", + }, { + Name: "function-type", + Nodes: []*Node{{ + Name: "int-type", + }, { + Name: "string-type", + }}, + }, { + Name: "string-type", + }}, + }}, + ignorePosition: true, + }, { + msg: "statement group", + text: "(for {})", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "block", + }}, + }}, + ignorePosition: true, + }}) +} diff --git a/next_test.go b/next_test.go new file mode 100644 index 0000000..6439f75 --- /dev/null +++ b/next_test.go @@ -0,0 +1,740 @@ +package parse + +import ( + "bytes" + "io" + "os" + "testing" + "time" +) + +type testItem struct { + msg string + text string + fail bool + node *Node + nodes []*Node + ignorePosition bool +} + +func testSyntaxReader(r io.Reader, traceLevel int) (*Syntax, error) { + trace := NewTrace(0) + + b, err := bootSyntax(trace) + if err != nil { + return nil, err + } + + doc, err := b.Parse(r) + if err != nil { + return nil, err + } + + trace = NewTrace(traceLevel) + s := NewSyntax(trace) + if err := define(s, doc); err != nil { + return nil, err + } + + if err := s.Init(); err != nil { + return nil, err + } + + return s, nil +} + +func testSyntaxString(s string, traceLevel int) (*Syntax, error) { + return testSyntaxReader(bytes.NewBufferString(s), traceLevel) +} + +func testSyntax(file string, traceLevel int) (*Syntax, error) { + f, err := os.Open(file) + if err != nil { + return nil, err + } + + defer f.Close() + return testSyntaxReader(f, traceLevel) +} + +func checkNodesPosition(t *testing.T, left, right []*Node, position bool) { + if len(left) != len(right) { + t.Error("length doesn't match", len(left), len(right)) + return + } + + for len(left) > 0 { + checkNodePosition(t, left[0], right[0], position) + if t.Failed() { + return + } + + left, right = left[1:], right[1:] + } +} + +func checkNodePosition(t *testing.T, left, right *Node, position bool) { + if (left == nil) != (right == nil) { + t.Error("nil reference doesn't match", left == nil, right == nil) + return + } + + if left == nil { + return + } + + if left.Name != right.Name { + t.Error("name doesn't match", left.Name, right.Name) + return + } + + if position && left.from != right.from { + t.Error("from doesn't match", left.Name, left.from, right.from) + return + } + + if position && left.to != right.to { + t.Error("to doesn't match", left.Name, left.to, right.to) + return + } + + if len(left.Nodes) != len(right.Nodes) { + t.Error("length doesn't match", left.Name, len(left.Nodes), len(right.Nodes)) + t.Log(left) + t.Log(right) + for { + if len(left.Nodes) > 0 { + t.Log("<", left.Nodes[0]) + left.Nodes = left.Nodes[1:] + } + + if len(right.Nodes) > 0 { + t.Log(">", right.Nodes[0]) + right.Nodes = right.Nodes[1:] + } + + if len(left.Nodes) == 0 && len(right.Nodes) == 0 { + break + } + } + return + } + + checkNodesPosition(t, left.Nodes, right.Nodes, position) +} + +func checkNodes(t *testing.T, left, right []*Node) { + checkNodesPosition(t, left, right, true) +} + +func checkNode(t *testing.T, left, right *Node) { + checkNodePosition(t, left, right, true) +} + +func checkNodesIgnorePosition(t *testing.T, left, right []*Node) { + checkNodesPosition(t, left, right, false) +} + +func checkNodeIgnorePosition(t *testing.T, left, right *Node) { + checkNodePosition(t, left, right, false) +} + +func testReaderTrace(t *testing.T, r io.Reader, rootName string, traceLevel int, tests []testItem) { + s, err := testSyntaxReader(r, traceLevel) + if err != nil { + t.Error(err) + return + } + + start := time.Now() + defer func() { t.Log("\ntotal duration", time.Since(start)) }() + + for _, ti := range tests { + t.Run(ti.msg, func(t *testing.T) { + n, err := s.Parse(bytes.NewBufferString(ti.text)) + + if ti.fail && err == nil { + t.Error("failed to fail") + return + } else if !ti.fail && err != nil { + t.Error(err) + return + } else if ti.fail { + return + } + + t.Log(n) + + cn := checkNode + if ti.ignorePosition { + cn = checkNodeIgnorePosition + } + + if ti.node != nil { + cn(t, n, ti.node) + } else { + cn(t, n, &Node{ + Name: rootName, + from: 0, + to: len(ti.text), + Nodes: ti.nodes, + }) + } + }) + } +} + +func testStringTrace(t *testing.T, s string, traceLevel int, tests []testItem) { + testReaderTrace(t, bytes.NewBufferString(s), "", traceLevel, tests) +} + +func testString(t *testing.T, s string, tests []testItem) { + testStringTrace(t, s, 0, tests) +} + +func testTrace(t *testing.T, file, rootName string, traceLevel int, tests []testItem) { + f, err := os.Open(file) + if err != nil { + t.Error(err) + return + } + + defer f.Close() + testReaderTrace(t, f, rootName, traceLevel, tests) +} + +func test(t *testing.T, file, rootName string, tests []testItem) { + testTrace(t, file, rootName, 0, tests) +} + +func TestRecursion(t *testing.T) { + testString( + t, + `A = "a" | A "a"`, + []testItem{{ + msg: "recursion in choice, right, left, commit", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }}, + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A = "a" | "a" A`, + []testItem{{ + msg: "recursion in choice, right, right, commit", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }}, + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A = "a" A | "a"`, + []testItem{{ + msg: "recursion in choice, left, right, commit", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }}, + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A = A "a" | "a"`, + []testItem{{ + msg: "recursion in choice, left, left, commit", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }}, + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A':alias = "a" | A' "a"; A = A'`, + []testItem{{ + msg: "recursion in choice, right, left, alias", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) + + testString( + t, + `A':alias = "a" | "a" A'; A = A'`, + []testItem{{ + msg: "recursion in choice, right, right, alias", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) + + testString( + t, + `A':alias = "a" A' | "a"; A = A'`, + []testItem{{ + msg: "recursion in choice, left, right, alias", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) + + testString( + t, + `A':alias = A' "a" | "a"; A = A'`, + []testItem{{ + msg: "recursion in choice, left, left, alias", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) +} + +func TestSequence(t *testing.T) { + testString( + t, + `AB = "a" | "a"? "a"? "b" "b"`, + []testItem{{ + msg: "sequence with optional items", + text: "abb", + node: &Node{ + Name: "AB", + to: 3, + }, + }, { + msg: "sequence with optional items, none", + text: "bb", + node: &Node{ + Name: "AB", + to: 2, + }, + }}, + ) + + testString( + t, + `A = "a" | (A?)*`, + []testItem{{ + msg: "sequence in choice with redundant quantifier", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }, { + Name: "A", + }, { + Name: "A", + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A = ("a"*)*`, + []testItem{{ + msg: "sequence with redundant quantifier", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) +} + +func TestQuantifiers(t *testing.T) { + testString( + t, + `A = "a" "b"{0} "a"`, + []testItem{{ + msg: "zero", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero, fail", + text: "aba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{1} "a"`, + []testItem{{ + msg: "one, missing", + text: "aa", + fail: true, + }, { + msg: "one", + text: "aba", + node: &Node{ + Name: "A", + to: 3, + }, + }, { + msg: "one, too much", + text: "abba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{3} "a"`, + []testItem{{ + msg: "three, missing", + text: "abba", + fail: true, + }, { + msg: "three", + text: "abbba", + node: &Node{ + Name: "A", + to: 5, + }, + }, { + msg: "three, too much", + text: "abbbba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{0,1} "a"`, + []testItem{{ + msg: "zero or one explicit, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or one explicit", + text: "aba", + node: &Node{ + Name: "A", + to: 3, + }, + }, { + msg: "zero or one explicit, too much", + text: "abba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{,1} "a"`, + []testItem{{ + msg: "zero or one explicit, omit zero, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or one explicit, omit zero", + text: "aba", + node: &Node{ + Name: "A", + to: 3, + }, + }, { + msg: "zero or one explicit, omit zero, too much", + text: "abba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"? "a"`, + []testItem{{ + msg: "zero or one explicit, shortcut, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or one explicit, shortcut", + text: "aba", + node: &Node{ + Name: "A", + to: 3, + }, + }, { + msg: "zero or one explicit, shortcut, too much", + text: "abba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{0,3} "a"`, + []testItem{{ + msg: "zero or three, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or three", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }, { + msg: "zero or three", + text: "abbba", + node: &Node{ + Name: "A", + to: 5, + }, + }, { + msg: "zero or three, too much", + text: "abbbba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{,3} "a"`, + []testItem{{ + msg: "zero or three, omit zero, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or three, omit zero", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }, { + msg: "zero or three, omit zero", + text: "abbba", + node: &Node{ + Name: "A", + to: 5, + }, + }, { + msg: "zero or three, omit zero, too much", + text: "abbbba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{1,3} "a"`, + []testItem{{ + msg: "one or three, missing", + text: "aa", + fail: true, + }, { + msg: "one or three", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }, { + msg: "one or three", + text: "abbba", + node: &Node{ + Name: "A", + to: 5, + }, + }, { + msg: "one or three, too much", + text: "abbbba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{3,5} "a"`, + []testItem{{ + msg: "three or five, missing", + text: "abba", + fail: true, + }, { + msg: "three or five", + text: "abbbba", + node: &Node{ + Name: "A", + to: 6, + }, + }, { + msg: "three or five", + text: "abbbbba", + node: &Node{ + Name: "A", + to: 7, + }, + }, { + msg: "three or five, too much", + text: "abbbbbba", + fail: true, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"{0,} "a"`, + 1, + []testItem{{ + msg: "zero or more, explicit, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or more, explicit", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"* "a"`, + 1, + []testItem{{ + msg: "zero or more, shortcut, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or more, shortcut", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"{1,} "a"`, + 1, + []testItem{{ + msg: "one or more, explicit, missing", + text: "aa", + fail: true, + }, { + msg: "one or more, explicit", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"+ "a"`, + 1, + []testItem{{ + msg: "one or more, shortcut, missing", + text: "aa", + fail: true, + }, { + msg: "one or more, shortcut", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"{3,} "a"`, + 1, + []testItem{{ + msg: "three or more, explicit, missing", + text: "abba", + fail: true, + }, { + msg: "three or more, explicit", + text: "abbbba", + node: &Node{ + Name: "A", + to: 6, + }, + }}, + ) +} diff --git a/node.go b/node.go new file mode 100644 index 0000000..438e334 --- /dev/null +++ b/node.go @@ -0,0 +1,89 @@ +package parse + +import "fmt" + +type Node struct { + Name string + Nodes []*Node + commitType CommitType + from, to int + tokens []rune +} + +func newNode(name string, ct CommitType, from, to int) *Node { + return &Node{ + Name: name, + commitType: ct, + from: from, + to: to, + } +} + +func (n *Node) tokenLength() int { + return n.to - n.from +} + +func (n *Node) nodeLength() int { + return len(n.Nodes) +} + +func findNode(in, n *Node) { + if n == in { + panic(fmt.Errorf("found self in %s", in.Name)) + } + + for _, ni := range n.Nodes { + findNode(in, ni) + } +} + +func (n *Node) append(p *Node) { + findNode(n, p) + n.Nodes = append(n.Nodes, p) + // TODO: check rather if n.from <= p.from??? or panic if less? or check rather node length and commit + // happens in the end anyway? + if n.from == 0 && n.to == 0 { + n.from = p.from + } + + n.to = p.to +} + +func (n *Node) clear() { + n.from = 0 + n.to = 0 + n.Nodes = nil +} + +func (n *Node) applyTokens(t []rune) { + n.tokens = t + for _, ni := range n.Nodes { + ni.applyTokens(t) + } +} + +func (n *Node) commit() { + var nodes []*Node + for _, ni := range n.Nodes { + ni.commit() + if ni.commitType&Alias != 0 { + nodes = append(nodes, ni.Nodes...) + } else { + nodes = append(nodes, ni) + } + } + + n.Nodes = nodes +} + +func (n *Node) String() string { + if n.from >= len(n.tokens) || n.to > len(n.tokens) { + return n.Name + ":incomplete" + } + + return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.from, n.to, n.Text()) +} + +func (n *Node) Text() string { + return string(n.tokens[n.from:n.to]) +} diff --git a/parse.go b/parse.go new file mode 100644 index 0000000..4929ab2 --- /dev/null +++ b/parse.go @@ -0,0 +1,69 @@ +package parse + +import ( + "errors" + "fmt" +) + +type definition interface { + nodeName() string + parser(*registry, []string) (parser, error) + commitType() CommitType +} + +type parser interface { + nodeName() string + setIncludedBy(parser, []string) + cacheIncluded(*context, *Node) + parse(Trace, *context) +} + +var errCannotIncludeParsers = errors.New("cannot include parsers") + +func parserNotFound(name string) error { + return fmt.Errorf("parser not found: %s", name) +} + +func stringsContain(ss []string, s string) bool { + for _, si := range ss { + if si == s { + return true + } + } + + return false +} + +func copyIncludes(to, from map[string]CommitType) { + if from == nil { + return + } + + for name, ct := range from { + to[name] = ct + } +} + +func mergeIncludes(left, right map[string]CommitType) map[string]CommitType { + m := make(map[string]CommitType) + copyIncludes(m, left) + copyIncludes(m, right) + return m +} + +func parse(t Trace, p parser, c *context) (*Node, error) { + p.parse(t, c) + if c.readErr != nil { + return nil, c.readErr + } + + if !c.match { + return nil, ErrInvalidInput + } + + if err := c.finalize(); err != nil { + return nil, err + } + + return c.node, nil +} diff --git a/quantifier.go b/quantifier.go new file mode 100644 index 0000000..e18606c --- /dev/null +++ b/quantifier.go @@ -0,0 +1,172 @@ +package parse + +type quantifierDefinition struct { + name string + commit CommitType + min, max int + item string +} + +type quantifierParser struct { + name string + commit CommitType + min, max int + item parser + includedBy []parser +} + +func newQuantifier(name string, ct CommitType, item string, min, max int) *quantifierDefinition { + return &quantifierDefinition{ + name: name, + commit: ct, + min: min, + max: max, + item: item, + } +} + +func (d *quantifierDefinition) nodeName() string { return d.name } + +func (d *quantifierDefinition) parser(r *registry, path []string) (parser, error) { + if stringsContain(path, d.name) { + panic(errCannotIncludeParsers) + } + + p, ok := r.parser(d.name) + if ok { + return p, nil + } + + qp := &quantifierParser{ + name: d.name, + commit: d.commit, + min: d.min, + max: d.max, + } + + r.setParser(qp) + + item, ok := r.parser(d.item) + if !ok { + itemDefinition, ok := r.definition(d.item) + if !ok { + return nil, parserNotFound(d.item) + } + + var err error + item, err = itemDefinition.parser(r, path) + if err != nil { + return nil, err + } + } + + qp.item = item + return qp, nil +} + +func (d *quantifierDefinition) commitType() CommitType { return d.commit } +func (p *quantifierParser) nodeName() string { return p.name } + +// TODO: merge the quantifier into the sequence +// DOC: sequences are hungry and are not revisited, a*a cannot match anything. +// DOC: how to match a tailing a? (..)*a | .(..)*a + +func (p *quantifierParser) setIncludedBy(i parser, path []string) { + if stringsContain(path, p.name) { + panic(errCannotIncludeParsers) + } + + p.includedBy = append(p.includedBy, i) +} + +func (p *quantifierParser) cacheIncluded(*context, *Node) { + panic(errCannotIncludeParsers) +} + +func (p *quantifierParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing quantifier", c.offset) + + if p.commit&Documentation != 0 { + t.Out1("fail, doc") + c.fail(c.offset) + return + } + + if c.excluded(c.offset, p.name) { + t.Out1("excluded") + c.fail(c.offset) + return + } + + c.exclude(c.offset, p.name) + defer c.include(c.offset, p.name) + + node := newNode(p.name, p.commit, c.offset, c.offset) + + // this way of checking the cache definitely needs the testing of the russ cox form + for { + if p.max >= 0 && node.nodeLength() == p.max { + t.Out1("success, max reached") + c.cache.set(node.from, p.name, node) + for _, i := range p.includedBy { + i.cacheIncluded(c, node) + } + + c.success(node) + return + } + + t.Out2("next quantifier item") + + // n, m, ok := c.cache.get(c.offset, p.item.nodeName()) + m, ok := c.fromCache(p.item.nodeName()) + if ok { + t.Out1("quantifier item found in cache, match:", m, c.offset, c.node.tokenLength()) + if m { + node.append(c.node) + if c.node.tokenLength() > 0 { + t.Out2("taking next after cached found") + continue + } + } + + if node.nodeLength() >= p.min { + t.Out1("success, no more match") + c.cache.set(node.from, p.name, node) + for _, i := range p.includedBy { + i.cacheIncluded(c, node) + } + + c.success(node) + } else { + t.Out1("fail, min not reached") + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + } + + return + } + + p.item.parse(t, c) + if !c.match || c.node.tokenLength() == 0 { + if node.nodeLength() >= p.min { + t.Out1("success, no more match") + c.cache.set(node.from, p.name, node) + for _, i := range p.includedBy { + i.cacheIncluded(c, node) + } + + c.success(node) + } else { + t.Out1("fail, min not reached") + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + } + + return + } + + node.append(c.node) + } +} diff --git a/registry.go b/registry.go new file mode 100644 index 0000000..09160ad --- /dev/null +++ b/registry.go @@ -0,0 +1,36 @@ +package parse + +type registry struct { + definitions map[string]definition + parsers map[string]parser +} + +func newRegistry() *registry { + return ®istry{ + definitions: make(map[string]definition), + parsers: make(map[string]parser), + } +} + +func (r *registry) definition(name string) (definition, bool) { + d, ok := r.definitions[name] + return d, ok +} + +func (r *registry) parser(name string) (parser, bool) { + p, ok := r.parsers[name] + return p, ok +} + +func (r *registry) setDefinition(d definition) error { + if _, ok := r.definitions[d.nodeName()]; ok { + return duplicateDefinition(d.nodeName()) + } + + r.definitions[d.nodeName()] = d + return nil +} + +func (r *registry) setParser(p parser) { + r.parsers[p.nodeName()] = p +} diff --git a/scheme.p b/scheme.p new file mode 100644 index 0000000..28cf423 --- /dev/null +++ b/scheme.p @@ -0,0 +1,14 @@ +// TODO: comment + +ws:alias = [ \b\f\n\r\t\v]; +comment:alias = ";" [^\n]*; +wsc:alias = ws | comment; +number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; +string = "\"" ([^\\"] | "\\" .)* "\""; +symbol = ([^\\ \n\t\b\f\r\v\"()\[\]#] | "\\" .)+; +list-form:alias = "(" wsc* (expression wsc*)* ")" + | "[" wsc* (expression wsc*)* "]"; +list = list-form; +vector = "#" list-form; +expression:alias = number | string | symbol | list; +scheme = wsc* (expression wsc*)*; diff --git a/scheme_test.go b/scheme_test.go new file mode 100644 index 0000000..1946466 --- /dev/null +++ b/scheme_test.go @@ -0,0 +1,84 @@ +package parse + +import "testing" + +func TestScheme(t *testing.T) { + test(t, "scheme.p", "scheme", []testItem{{ + msg: "empty", + }, { + msg: "a function", + text: ` + (define (foo a b c) + (let ([bar (+ a b c)] + [baz (- a b c)]) + (* bar baz))) + `, + nodes: []*Node{{ + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + }}, + ignorePosition: true, + }}) +} diff --git a/sequence.go b/sequence.go new file mode 100644 index 0000000..0539e62 --- /dev/null +++ b/sequence.go @@ -0,0 +1,187 @@ +package parse + +type sequenceDefinition struct { + name string + commit CommitType + items []string +} + +type sequenceParser struct { + name string + commit CommitType + items []parser + including []parser +} + +func newSequence(name string, ct CommitType, items []string) *sequenceDefinition { + return &sequenceDefinition{ + name: name, + commit: ct, + items: items, + } +} + +func (d *sequenceDefinition) nodeName() string { return d.name } + +func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error) { + if stringsContain(path, d.name) { + panic(errCannotIncludeParsers) + } + + p, ok := r.parser(d.name) + if ok { + return p, nil + } + + sp := &sequenceParser{ + name: d.name, + commit: d.commit, + } + + r.setParser(sp) + + var items []parser + path = append(path, d.name) + for _, name := range d.items { + item, ok := r.parser(name) + if ok { + items = append(items, item) + continue + } + + itemDefinition, ok := r.definition(name) + if !ok { + return nil, parserNotFound(name) + } + + item, err := itemDefinition.parser(r, path) + if err != nil { + return nil, err + } + + items = append(items, item) + } + + // for single items, acts like a choice + if len(items) == 1 { + items[0].setIncludedBy(sp, path) + } + + sp.items = items + return sp, nil +} + +func (d *sequenceDefinition) commitType() CommitType { + return d.commit +} + +func (p *sequenceParser) nodeName() string { return p.name } + +func (p *sequenceParser) setIncludedBy(i parser, path []string) { + if stringsContain(path, p.name) { + return + } + + p.including = append(p.including, i) +} + +func (p *sequenceParser) cacheIncluded(c *context, n *Node) { + if !c.excluded(n.from, p.name) { + return + } + + nc := newNode(p.name, p.commit, n.from, n.to) + nc.append(n) + c.cache.set(nc.from, p.name, nc) + + // maybe it is enough to cache only those that are on the path + for _, i := range p.including { + i.cacheIncluded(c, nc) + } +} + +/* +should be possible to parse: + +a = "0" +b = "1" +c = a* e b +d = a | c +e = b | d + +input: 111 +*/ + +func (p *sequenceParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing sequence", c.offset) + + if p.commit&Documentation != 0 { + t.Out1("fail, doc") + c.fail(c.offset) + return + } + + // TODO: maybe we can check the cache here? no because that would exclude the continuations + + if c.excluded(c.offset, p.name) { + t.Out1("excluded") + c.fail(c.offset) + return + } + + c.exclude(c.offset, p.name) + defer c.include(c.offset, p.name) + + items := p.items + node := newNode(p.name, p.commit, c.offset, c.offset) + + for len(items) > 0 { + t.Out2("next sequence item") + // n, m, ok := c.cache.get(c.offset, items[0].nodeName()) + m, ok := c.fromCache(items[0].nodeName()) + if ok { + t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset) + if m { + t.Out2("sequence item from cache:", c.node.Name, len(c.node.Nodes), c.node.from) + node.append(c.node) + items = items[1:] + continue + } + + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + return + } + + items[0].parse(t, c) + items = items[1:] + + if !c.match { + t.Out1("fail, item failed") + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + return + } + + if c.node.tokenLength() > 0 { + t.Out2("appending sequence item", c.node.Name, len(c.node.Nodes)) + node.append(c.node) + } + } + + t.Out1("success, items parsed") + t.Out2("nodes", node.nodeLength()) + if node.Name == "group" { + t.Out2("caching group", node.from, node.Nodes[2].Name, node.Nodes[2].nodeLength()) + } + + // is this cached item ever taken? + c.cache.set(node.from, p.name, node) + for _, i := range p.including { + i.cacheIncluded(c, node) + } + + t.Out2("caching sequence and included by done") + c.success(node) +} diff --git a/sexpr.p b/sexpr.p new file mode 100644 index 0000000..5ac70c8 --- /dev/null +++ b/sexpr.p @@ -0,0 +1,9 @@ +ws:alias = [ \b\f\n\r\t\v]; +comment:alias = ";" [^\n]*; +wsc:alias = ws | comment; +number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; +string = "\"" ([^\\"] | "\\" .)* "\""; +symbol = ([^\\ \n\t\b\f\r\v\"()] | "\\" .)+; +list = "(" wsc* (expression wsc*)* ")"; +expression:alias = number | string | symbol | list; +s-expression = expression; diff --git a/sexpr_test.go b/sexpr_test.go new file mode 100644 index 0000000..0628b45 --- /dev/null +++ b/sexpr_test.go @@ -0,0 +1,71 @@ +package parse + +import "testing" + +func TestSExpr(t *testing.T) { + test(t, "sexpr.p", "s-expression", []testItem{{ + msg: "number", + text: "42", + nodes: []*Node{{ + Name: "number", + }}, + ignorePosition: true, + }, { + msg: "string", + text: "\"foo\"", + nodes: []*Node{{ + Name: "string", + }}, + ignorePosition: true, + }, { + msg: "symbol", + text: "foo", + nodes: []*Node{{ + Name: "symbol", + }}, + ignorePosition: true, + }, { + msg: "nil", + text: "()", + nodes: []*Node{{ + Name: "list", + }}, + ignorePosition: true, + }, { + msg: "list", + text: "(foo bar baz)", + nodes: []*Node{{ + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "embedded list", + text: "(foo (bar (baz)) qux)", + nodes: []*Node{{ + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }}) +} diff --git a/syntax.go b/syntax.go new file mode 100644 index 0000000..aa44626 --- /dev/null +++ b/syntax.go @@ -0,0 +1,158 @@ +package parse + +import ( + "bufio" + "errors" + "fmt" + "io" +) + +type CommitType int + +const ( + None CommitType = 0 + Alias CommitType = 1 << iota + Documentation + Root +) + +type Syntax struct { + trace Trace + registry *registry + initialized bool + initFailed bool + rootSet bool + root definition + parser parser +} + +var ( + ErrSyntaxInitialized = errors.New("syntax initialized") + ErrInitFailed = errors.New("init failed") + ErrNoParsersDefined = errors.New("no parsers defined") + ErrInvalidInput = errors.New("invalid input") + ErrInvalidCharacter = errors.New("invalid character") // two use cases: utf8 and boot + ErrUnexpectedCharacter = errors.New("unexpected character") + ErrInvalidSyntax = errors.New("invalid syntax") + ErrRootAlias = errors.New("root node cannot be an alias") +) + +func duplicateDefinition(name string) error { + return fmt.Errorf("duplicate definition: %s", name) +} + +func NewSyntax(t Trace) *Syntax { + if t == nil { + t = NewTrace(0) + } + + return &Syntax{ + trace: t, + registry: newRegistry(), + } +} + +func (s *Syntax) register(d definition) error { + if s.initialized { + return ErrSyntaxInitialized + } + + if d.commitType()&Root != 0 { + s.root = d + s.rootSet = true + } else if !s.rootSet { + s.root = d + } + + return s.registry.setDefinition(d) +} + +func (s *Syntax) AnyChar(name string, ct CommitType) error { + return s.register(newChar(name, ct, true, false, nil, nil)) +} + +func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error { + return s.register(newChar(name, ct, false, not, chars, ranges)) +} + +func childName(name string, childIndex int) string { + return fmt.Sprintf("%s:%d", name, childIndex) +} + +func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error { + var refs []string + for i, ci := range chars { + ref := childName(name, i) + refs = append(refs, ref) + if err := s.register(newChar(ref, Alias, false, false, []rune{ci}, nil)); err != nil { + return err + } + } + + return s.Sequence(name, ct, refs...) +} + +func (s *Syntax) Quantifier(name string, ct CommitType, item string, min, max int) error { + return s.register(newQuantifier(name, ct, item, min, max)) +} + +func (s *Syntax) Sequence(name string, ct CommitType, items ...string) error { + return s.register(newSequence(name, ct, items)) +} + +func (s *Syntax) Choice(name string, ct CommitType, elements ...string) error { + return s.register(newChoice(name, ct, elements)) +} + +func (s *Syntax) Read(r io.Reader) error { + if s.initialized { + return ErrSyntaxInitialized + } + + return nil +} + +func (s *Syntax) Init() error { + if s.initFailed { + return ErrInitFailed + } + + if s.initialized { + return nil + } + + if s.root == nil { + return ErrNoParsersDefined + } + + if s.root.commitType()&Alias != 0 { + return ErrRootAlias + } + + var err error + s.parser, err = s.root.parser(s.registry, nil) + if err != nil { + s.initFailed = true + return err + } + + s.initialized = true + return nil +} + +func (s *Syntax) Generate(w io.Writer) error { + if err := s.Init(); err != nil { + return err + } + + return nil +} + +func (s *Syntax) Parse(r io.Reader) (*Node, error) { + if err := s.Init(); err != nil { + return nil, err + } + + c := newContext(bufio.NewReader(r)) + return parse(s.trace, s.parser, c) +} diff --git a/syntax.p b/syntax.p new file mode 100644 index 0000000..87a483a --- /dev/null +++ b/syntax.p @@ -0,0 +1,78 @@ +ws:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; +wsc:alias = ws | comment; + +block-comment:alias = "/*" ("*" [^/] | [^*])* "*/"; +line-comment:alias = "//" [^\n]*; +comment-segment:alias = line-comment | block-comment; +ws-no-nl:alias = " " | "\t" | "\b" | "\f" | "\r" | "\v"; +comment = comment-segment (ws-no-nl* "\n"? ws-no-nl* comment-segment)*; + +any-char = "."; // equivalent to [^] + +// TODO: document matching terminal: [] + +// TODO: handle char class equivalences + +// TODO: enable streaming + +// TODO: set route function in generated code? + +// caution: newline is accepted +class-not = "^"; +class-char = [^\\\[\]\^\-] | "\\" .; +char-range = class-char "-" class-char; +char-class = "[" class-not? (class-char | char-range)* "]"; + +// caution: newline is accepted +sequence-char = [^\\"] | "\\" .; +char-sequence = "\"" sequence-char* "\""; + +// TODO: this can be mixed up with sequence. Is it fine? fix this, see mml symbol +terminal:alias = any-char | char-class | char-sequence; + +symbol = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+; + +group:alias = "(" wsc* expression wsc* ")"; + +number:alias = [0-9]+; +count = number; +count-quantifier = "{" wsc* count wsc* "}"; +range-from = number; +range-to = number; +range-quantifier = "{" wsc* range-from? wsc* "," wsc* range-to? wsc* "}"; +one-or-more = "+"; +zero-or-more = "*"; +zero-or-one = "?"; +quantity:alias = count-quantifier + | range-quantifier + | one-or-more + | zero-or-more + | zero-or-one; + +quantifier = (terminal | symbol | group) wsc* quantity; + +item:alias = terminal | symbol | group | quantifier; +sequence = item (wsc* item)+; + +element:alias = terminal | symbol | group | quantifier | sequence; + +// DOC: once cached, doesn't try again, even in a new context, therefore the order may matter +choice = element (wsc* "|" wsc* element)+; + +// DOC: not having 'not' needs some tricks sometimes + +expression:alias = terminal + | symbol + | group + | quantifier + | sequence + | choice; + +alias = "alias"; +doc = "doc"; +root = "root"; +flag:alias = alias | doc | root; +definition = symbol (":" flag)* wsc* "=" wsc* expression; + +definitions:alias = definition (wsc* ";" (wsc | ";")* definition)*; +syntax:root = (wsc | ";")* definitions? (wsc | ";")*; diff --git a/trace.go b/trace.go new file mode 100644 index 0000000..04f7e97 --- /dev/null +++ b/trace.go @@ -0,0 +1,72 @@ +package parse + +import ( + "fmt" + "os" +) + +type Trace interface { + Out(...interface{}) + Out1(...interface{}) + Out2(...interface{}) + Out3(...interface{}) + Extend(string) Trace +} + +type DefaultTrace struct { + level int + path string +} + +type NopTrace struct{} + +func NewTrace(level int) *DefaultTrace { + return &DefaultTrace{ + level: level, + path: "/", + } +} + +func (t *DefaultTrace) printlnLevel(l int, a ...interface{}) { + if l > t.level { + return + } + + fmt.Fprintln(os.Stderr, append([]interface{}{t.path}, a...)...) +} + +func (t *DefaultTrace) Out(a ...interface{}) { + t.printlnLevel(0, a...) +} + +func (t *DefaultTrace) Out1(a ...interface{}) { + t.printlnLevel(1, a...) +} + +func (t *DefaultTrace) Out2(a ...interface{}) { + t.printlnLevel(2, a...) +} + +func (t *DefaultTrace) Out3(a ...interface{}) { + t.printlnLevel(3, a...) +} + +func (t *DefaultTrace) Extend(name string) Trace { + var p string + if t.path == "/" { + p = t.path + name + } else { + p = t.path + "/" + name + } + + return &DefaultTrace{ + level: t.level, + path: p, + } +} + +func (NopTrace) Out(...interface{}) {} +func (NopTrace) Out1(...interface{}) {} +func (NopTrace) Out2(...interface{}) {} +func (NopTrace) Out3(...interface{}) {} +func (t NopTrace) Extend(string) Trace { return t }