diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0e1b440 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +SOURCES = $(shell find . -name '*.go') + +default: build + +imports: + @goimports -w $(SOURCES) + +build: $(SOURCES) + go build ./... + +check: build + go test ./... -test.short -run ^Test + +fmt: $(SOURCES) + @gofmt -w -s $(SOURCES) + +precommit: build check fmt diff --git a/boot.go b/boot.go new file mode 100644 index 0000000..e68bc6b --- /dev/null +++ b/boot.go @@ -0,0 +1,211 @@ +package parse + +import ( + "errors" + "os" + "strconv" +) + +var errInvalidDefinition = errors.New("invalid syntax definition") + +func stringToCommitType(s string) CommitType { + switch s { + case "alias": + return Alias + case "doc": + return Documentation + case "root": + return Root + default: + return None + } +} + +func checkBootDefinitionLength(d []string) error { + if len(d) < 3 { + return errInvalidDefinition + } + + switch d[0] { + case "chars", "class": + if len(d) < 4 { + return errInvalidDefinition + } + + case "quantifier": + if len(d) != 6 { + return errInvalidDefinition + } + + case "sequence", "choice": + if len(d) < 4 { + return errInvalidDefinition + } + } + + return nil +} + +func parseClass(c []rune) (not bool, chars []rune, ranges [][]rune, err error) { + if c[0] == '^' { + not = true + c = c[1:] + } + + for { + if len(c) == 0 { + return + } + + var c0 rune + c0, c = c[0], c[1:] + switch c0 { + case '[', ']', '^', '-': + err = errInvalidDefinition + return + } + + if c0 == '\\' { + if len(c) == 0 { + err = errInvalidDefinition + return + } + + c0, c = unescapeChar(c[0]), c[1:] + } + + if len(c) < 2 || c[0] != '-' { + chars = append(chars, c0) + continue + } + + var c1 rune + c1, c = c[1], c[2:] + if c1 == '\\' { + if len(c) == 0 { + err = errInvalidDefinition + return + } + + c1, c = unescapeChar(c[0]), c[1:] + } + + ranges = append(ranges, []rune{c0, c1}) + } +} + +func defineBootAnything(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + return s.AnyChar(d[1], ct) +} + +func defineBootClass(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + + not, chars, ranges, err := parseClass([]rune(d[3])) + if err != nil { + return err + } + + return s.Class(d[1], ct, not, chars, ranges) +} + +func defineBootCharSequence(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + + chars, err := unescape('\\', []rune{'"', '\\'}, []rune(d[3])) + if err != nil { + return err + } + + return s.CharSequence(d[1], ct, chars) +} + +func defineBootQuantifier(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + + var ( + min, max int + err error + ) + + if min, err = strconv.Atoi(d[4]); err != nil { + return err + } + + if max, err = strconv.Atoi(d[5]); err != nil { + return err + } + + return s.Quantifier(d[1], ct, d[3], min, max) +} + +func defineBootSequence(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + return s.Sequence(d[1], ct, d[3:]...) +} + +func defineBootChoice(s *Syntax, d []string) error { + ct := stringToCommitType(d[2]) + return s.Choice(d[1], ct, d[3:]...) +} + +func defineBoot(s *Syntax, d []string) error { + switch d[0] { + case "anything": + return defineBootAnything(s, d) + case "class": + return defineBootClass(s, d) + case "chars": + return defineBootCharSequence(s, d) + case "quantifier": + return defineBootQuantifier(s, d) + case "sequence": + return defineBootSequence(s, d) + case "choice": + return defineBootChoice(s, d) + default: + return errInvalidDefinition + } +} + +func defineAllBoot(s *Syntax, defs [][]string) error { + for _, d := range defs { + if err := defineBoot(s, d); err != nil { + return err + } + } + + return nil +} + +func initBoot(t Trace, definitions [][]string) (*Syntax, error) { + s := NewSyntax(t) + if err := defineAllBoot(s, definitions); err != nil { + return nil, err + } + + return s, s.Init() +} + +func bootSyntax(t Trace) (*Syntax, error) { + b, err := initBoot(t, bootDefinitions) + if err != nil { + return nil, err + } + + f, err := os.Open("syntax.p") + if err != nil { + return nil, err + } + + defer f.Close() + + doc, err := b.Parse(f) + if err != nil { + return nil, err + } + + s := NewSyntax(t) + return s, define(s, doc) +} diff --git a/boot_test.go b/boot_test.go new file mode 100644 index 0000000..0e85f68 --- /dev/null +++ b/boot_test.go @@ -0,0 +1,73 @@ +package parse + +import ( + "os" + "testing" +) + +func TestBoot(t *testing.T) { + var trace Trace + // trace = NewTrace(2) + + b, err := initBoot(trace, bootDefinitions) + if err != nil { + t.Error(err) + return + } + + f, err := os.Open("syntax.p") + if err != nil { + t.Error(err) + return + } + + defer f.Close() + + n0, err := b.Parse(f) + if err != nil { + t.Error(err) + return + } + + s0 := NewSyntax(trace) + if err := define(s0, n0); err != nil { + t.Error(err) + } + + _, err = f.Seek(0, 0) + if err != nil { + t.Error(err) + return + } + + n1, err := s0.Parse(f) + if err != nil { + t.Error(err) + return + } + + checkNode(t, n1, n0) + if t.Failed() { + return + } + + s1 := NewSyntax(trace) + if err := define(s1, n1); err != nil { + t.Error(err) + return + } + + _, err = f.Seek(0, 0) + if err != nil { + t.Error(err) + return + } + + n2, err := s1.Parse(f) + if err != nil { + t.Error(err) + return + } + + checkNode(t, n2, n1) +} diff --git a/bootsyntax.go b/bootsyntax.go new file mode 100644 index 0000000..2bcf746 --- /dev/null +++ b/bootsyntax.go @@ -0,0 +1,285 @@ +package parse + +var bootDefinitions = [][]string{{ + "chars", "space", "alias", " ", +}, { + "chars", "tab", "alias", "\\t", +}, { + "chars", "nl", "alias", "\\n", +}, { + "chars", "backspace", "alias", "\\b", +}, { + "chars", "formfeed", "alias", "\\f", +}, { + "chars", "carryreturn", "alias", "\\r", +}, { + "chars", "verticaltab", "alias", "\\v", +}, { + "choice", + "ws", + "alias", + "space", + "tab", + "nl", + "backspace", + "formfeed", + "carryreturn", + "verticaltab", +}, { + "chars", "open-block-comment", "alias", "/*", +}, { + "chars", "close-block-comment", "alias", "*/", +}, { + "chars", "star", "alias", "*", +}, { + "class", "not-slash", "alias", "^/", +}, { + "class", "not-star", "alias", "^*", +}, { + "chars", "double-slash", "alias", "//", +}, { + "class", "not-nl", "alias", "^\\n", +}, { + "sequence", "not-block-close", "alias", "star", "not-slash", +}, { + "choice", "block-comment-char", "alias", "not-block-close", "not-star", +}, { + "quantifier", "block-comment-body", "alias", "block-comment-char", "0", "-1", +}, { + "sequence", + "block-comment", + "alias", + "open-block-comment", + "block-comment-body", + "close-block-comment", +}, { + "quantifier", "not-nls", "alias", "not-nl", "0", "-1", +}, { + "sequence", "line-comment", "alias", "double-slash", "not-nls", +}, { + "choice", "comment-segment", "alias", "block-comment", "line-comment", +}, { + "quantifier", "wss", "alias", "ws", "0", "-1", +}, { + "quantifier", "optional-nl", "alias", "nl", "0", "1", +}, { + "choice", + "ws-no-nl", + "alias", + "space", + "tab", + "backspace", + "formfeed", + "carryreturn", + "verticaltab", +}, { + "sequence", + "continue-comment-segment", + "alias", + "ws-no-nl", + "optional-nl", + "ws-no-nl", + "comment-segment", +}, { + "quantifier", "continue-comment", "alias", "continue-comment-segment", "0", "-1", +}, { + "sequence", + "comment", + "none", + "comment-segment", + "continue-comment", +}, { + "choice", "wsc", "alias", "ws", "comment", +}, { + "quantifier", "wscs", "alias", "wsc", "0", "-1", +}, { + "anything", "anything", "alias", +}, { + "chars", "any-char", "none", ".", +}, { + "chars", "open-square", "alias", "[", +}, { + "chars", "close-square", "alias", "]", +}, { + "chars", "class-not", "none", "^", +}, { + "chars", "dash", "alias", "-", +}, { + "quantifier", "optional-class-not", "alias", "class-not", "0", "1", +}, { + "class", "not-class-control", "alias", "^\\\\\\[\\]\\^\\-", +}, { + "chars", "escape", "alias", "\\\\", +}, { + "sequence", "escaped-char", "alias", "escape", "anything", +}, { + "choice", "class-char", "none", "not-class-control", "escaped-char", +}, { + "sequence", "char-range", "none", "class-char", "dash", "class-char", +}, { + "choice", "char-or-range", "alias", "class-char", "char-range", +}, { + "quantifier", "chars-or-ranges", "alias", "char-or-range", "0", "-1", +}, { + "sequence", "char-class", "none", "open-square", "optional-class-not", "chars-or-ranges", "close-square", +}, { + "chars", "double-quote", "alias", "\\\"", +}, { + "class", "not-char-sequence-control", "alias", "^\\\\\"", +}, { + "choice", "sequence-char", "none", "not-char-sequence-control", "escaped-char", +}, { + "quantifier", "char-sequence-chars", "alias", "sequence-char", "0", "-1", +}, { + "sequence", "char-sequence", "none", "double-quote", "char-sequence-chars", "double-quote", +}, { + "choice", "terminal", "alias", "any-char", "char-class", "char-sequence", +}, { + "class", "symbol-char", "alias", "^\\\\ \\n\\t\\b\\f\\r\\v\\b/.\\[\\]\\\"{}\\^+*?|():=;", +}, { + "quantifier", "symbol-chars", "alias", "symbol-char", "1", "-1", +}, { + "sequence", "symbol", "none", "symbol-chars", +}, { + "chars", "open-paren", "alias", "(", +}, { + "chars", "close-paren", "alias", ")", +}, { + "sequence", "group", "alias", "open-paren", "wscs", "expression", "wscs", "close-paren", +}, { + "chars", "open-brace", "alias", "{", +}, { + "chars", "close-brace", "alias", "}", +}, { + "class", "digit", "alias", "0-9", +}, { + "quantifier", "number", "alias", "digit", "1", "-1", +}, { + "sequence", "count", "none", "number", +}, { + "sequence", "count-quantifier", "none", "open-brace", "wscs", "count", "wscs", "close-brace", +}, { + "sequence", "range-from", "none", "number", +}, { + "sequence", "range-to", "none", "number", +}, { + "chars", "comma", "alias", ",", +}, { + "sequence", + "range-quantifier", + "none", + "open-brace", + "wscs", + "range-from", + "wscs", + "comma", + "wscs", + "range-to", + "close-brace", +}, { + "chars", "one-or-more", "none", "+", +}, { + "chars", "zero-or-more", "none", "*", +}, { + "chars", "zero-or-one", "none", "?", +}, { + "choice", + "quantity", + "alias", + "count-quantifier", + "range-quantifier", + "one-or-more", + "zero-or-more", + "zero-or-one", +}, { + "choice", "quantifiable", "alias", "terminal", "symbol", "group", +}, { + "sequence", "quantifier", "none", "quantifiable", "wscs", "quantity", +}, { + "choice", "item", "alias", "terminal", "symbol", "group", "quantifier", +}, { + "sequence", "item-continue", "alias", "wscs", "item", +}, { + "quantifier", "items-continue", "alias", "item-continue", "0", "-1", +}, { + "sequence", "sequence", "none", "item", "items-continue", +}, { + "choice", "element", "alias", "terminal", "symbol", "group", "quantifier", "sequence", +}, { + "chars", "pipe", "alias", "|", +}, { + "sequence", "element-continue", "alias", "wscs", "pipe", "wscs", "element", +}, { + "quantifier", "elements-continue", "alias", "element-continue", "1", "-1", +}, { + "sequence", "choice", "none", "element", "elements-continue", +}, { + "choice", + "expression", + "alias", + "terminal", + "symbol", + "group", + "quantifier", + "sequence", + "choice", +}, { + "chars", "alias", "none", "alias", +}, { + "chars", "doc", "none", "doc", +}, { + "chars", "root", "none", "root", +}, { + "choice", "flag", "alias", "alias", "doc", "root", +}, { + "chars", "colon", "alias", ":", +}, { + "sequence", "flag-tag", "alias", "colon", "flag", +}, { + "quantifier", "flags", "alias", "flag-tag", "0", "-1", +}, { + "chars", "equal", "alias", "=", +}, { + "sequence", "definition", "none", "symbol", "flags", "wscs", "equal", "wscs", "expression", +}, { + "chars", "semicolon", "alias", ";", +}, { + "choice", "wsc-or-semicolon", "alias", "wsc", "semicolon", +}, { + "quantifier", "wsc-or-semicolons", "alias", "wsc-or-semicolon", "0", "-1", +}, { + "sequence", + "subsequent-definition", + "alias", + "wscs", + "semicolon", + "wsc-or-semicolons", + "definition", +}, { + "quantifier", + "subsequent-definitions", + "alias", + "subsequent-definition", + "0", + "-1", +}, { + "sequence", + "definitions", + "alias", + "definition", + "subsequent-definitions", +}, { + "quantifier", + "opt-definitions", + "alias", + "definitions", + "0", + "1", +}, { + "sequence", + "syntax", + "root", + "wsc-or-semicolons", + "opt-definitions", + "wsc-or-semicolons", +}} diff --git a/cache.go b/cache.go new file mode 100644 index 0000000..6ab6028 --- /dev/null +++ b/cache.go @@ -0,0 +1,94 @@ +package parse + +type cacheItem struct { + name string + node *Node +} + +type tokenCache struct { + match []*cacheItem // TODO: potential optimization can be to use a balanced binary tree + noMatch []string +} + +type cache struct { + tokens []*tokenCache // TODO: try with pointers, too +} + +func (c *cache) get(offset int, name string) (*Node, bool, bool) { + if len(c.tokens) <= offset { + return nil, false, false + } + + tc := c.tokens[offset] + if tc == nil { + return nil, false, false + } + + for _, i := range tc.noMatch { + if i == name { + return nil, false, true + } + } + + for _, i := range tc.match { + if i.name == name { + return i.node, true, true + } + } + + return nil, false, false +} + +func (c *cache) setOne(offset int, name string, n *Node) { +} + +func (c *cache) set(offset int, name string, n *Node) { + if len(c.tokens) <= offset { + if cap(c.tokens) > offset { + c.tokens = c.tokens[:offset+1] + } else { + c.tokens = c.tokens[:cap(c.tokens)] + for len(c.tokens) <= offset { + c.tokens = append(c.tokens, nil) + } + } + } + + tc := c.tokens[offset] + if tc == nil { + tc = &tokenCache{} + c.tokens[offset] = tc + } + + if n == nil { + for _, i := range tc.match { + if i.name == name { + return + } + } + + for _, i := range tc.noMatch { + if i == name { + return + } + } + + tc.noMatch = append(tc.noMatch, name) + return + } + + for _, i := range tc.match { + if i.name == name { + if n.tokenLength() > i.node.tokenLength() { + i.node = n + } + + return + } + } + + tc.match = append(tc.match, &cacheItem{ + name: name, + node: n, + }) +} diff --git a/char.go b/char.go new file mode 100644 index 0000000..5f61636 --- /dev/null +++ b/char.go @@ -0,0 +1,108 @@ +package parse + +type charParser struct { + name string + commit CommitType + any bool + not bool + chars []rune + ranges [][]rune + includedBy []parser +} + +func newChar( + name string, + ct CommitType, + any, not bool, + chars []rune, + ranges [][]rune, +) *charParser { + return &charParser{ + name: name, + commit: ct, + any: any, + not: not, + chars: chars, + ranges: ranges, + } +} + +func (p *charParser) nodeName() string { return p.name } + +func (p *charParser) parser(r *registry, path []string) (parser, error) { + if stringsContain(path, p.name) { + panic(errCannotIncludeParsers) + } + + r.setParser(p) + return p, nil +} + +func (p *charParser) commitType() CommitType { + return p.commit +} + +func (p *charParser) setIncludedBy(i parser, path []string) { + if stringsContain(path, p.name) { + panic(errCannotIncludeParsers) + } + + p.includedBy = append(p.includedBy, i) +} + +func (p *charParser) cacheIncluded(*context, *Node) { + panic(errCannotIncludeParsers) +} + +func (p *charParser) match(t rune) bool { + if p.any { + return true + } + + for _, ci := range p.chars { + if ci == t { + return !p.not + } + } + + for _, ri := range p.ranges { + if t >= ri[0] && t <= ri[1] { + return !p.not + } + } + + return p.not +} + +func (p *charParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing char", c.offset) + + if p.commit&Documentation != 0 { + t.Out1("fail, doc") + c.fail(c.offset) + return + } + + if m, ok := c.fromCache(p.name); ok { + t.Out1("found in cache, match:", m) + return + } + + if tok, ok := c.token(); ok && p.match(tok) { + t.Out1("success", string(tok)) + n := newNode(p.name, p.commit, c.offset, c.offset+1) + c.cache.set(c.offset, p.name, n) + for _, i := range p.includedBy { + i.cacheIncluded(c, n) + } + + c.success(n) + return + } else { + t.Out1("fail", string(tok)) + c.cache.set(c.offset, p.name, nil) + c.fail(c.offset) + return + } +} diff --git a/choice.go b/choice.go new file mode 100644 index 0000000..253e182 --- /dev/null +++ b/choice.go @@ -0,0 +1,180 @@ +package parse + +type choiceDefinition struct { + name string + commit CommitType + elements []string +} + +type choiceParser struct { + name string + commit CommitType + elements []parser + including []parser +} + +func newChoice(name string, ct CommitType, elements []string) *choiceDefinition { + return &choiceDefinition{ + name: name, + commit: ct, + elements: elements, + } +} + +func (d *choiceDefinition) nodeName() string { return d.name } + +// could store and cache everything that it fulfils + +func (d *choiceDefinition) parser(r *registry, path []string) (parser, error) { + p, ok := r.parser(d.name) + if ok { + return p, nil + } + + cp := &choiceParser{ + name: d.name, + commit: d.commit, + } + + r.setParser(cp) + + var elements []parser + path = append(path, d.name) + for _, e := range d.elements { + element, ok := r.parser(e) + if ok { + elements = append(elements, element) + element.setIncludedBy(cp, path) + continue + } + + elementDefinition, ok := r.definition(e) + if !ok { + return nil, parserNotFound(e) + } + + element, err := elementDefinition.parser(r, path) + if err != nil { + return nil, err + } + + element.setIncludedBy(cp, path) + elements = append(elements, element) + } + + cp.elements = elements + return cp, nil +} + +func (d *choiceDefinition) commitType() CommitType { + return d.commit +} + +func (p *choiceParser) nodeName() string { return p.name } + +func (p *choiceParser) setIncludedBy(i parser, path []string) { + if stringsContain(path, p.name) { + return + } + + p.including = append(p.including, i) +} + +func (p *choiceParser) cacheIncluded(c *context, n *Node) { + if !c.excluded(n.from, p.name) { + return + } + + nc := newNode(p.name, p.commit, n.from, n.to) + nc.append(n) + c.cache.set(nc.from, p.name, nc) + + // maybe it is enough to cache only those that are on the path + for _, i := range p.including { + i.cacheIncluded(c, nc) + } +} + +func (p *choiceParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing choice", c.offset) + + if p.commit&Documentation != 0 { + t.Out1("fail, doc") + c.fail(c.offset) + return + } + + if m, ok := c.fromCache(p.name); ok { + t.Out1("found in cache, match:", m) + return + } + + if c.excluded(c.offset, p.name) { + t.Out1("excluded") + c.fail(c.offset) + return + } + + c.exclude(c.offset, p.name) + defer c.include(c.offset, p.name) + + node := newNode(p.name, p.commit, c.offset, c.offset) + var match bool + + for { + elements := p.elements + var foundMatch bool + + // TODO: this can be the entry point for a transformation that enables the + // processing of massive amounts of autogenerated rules in parallel in a + // continously, dynamically cached way. E.g. teach a machine that learns + // everything from a public library. + + t.Out2("elements again") + for len(elements) > 0 { + t.Out2("in the choice", c.offset, node.from, elements[0].nodeName()) + elements[0].parse(t, c) + elements = elements[1:] + c.offset = node.from + + if !c.match || match && c.node.tokenLength() <= node.tokenLength() { + t.Out2("skipping") + continue + } + + t.Out2("appending", c.node.tokenLength(), node.tokenLength(), + "\"", string(c.tokens[node.from:node.to]), "\"", + "\"", string(c.tokens[c.node.from:c.node.to]), "\"", + c.node.Name, + ) + match = true + foundMatch = true + // node.clear() + node = newNode(p.name, p.commit, c.offset, c.offset) // TODO: review caching conditions + node.append(c.node) + + c.cache.set(node.from, p.name, node) + for _, i := range p.including { + i.cacheIncluded(c, node) + } + + // TODO: a simple break here can force PEG-style "priority" choices + } + + if !foundMatch { + break + } + } + + if match { + t.Out1("choice, success") + t.Out2("choice done", node.nodeLength()) + c.success(node) + return + } + + t.Out1("fail") + c.cache.set(node.from, p.name, nil) + c.fail(node.from) +} diff --git a/context.go b/context.go new file mode 100644 index 0000000..2121e9d --- /dev/null +++ b/context.go @@ -0,0 +1,152 @@ +package parse + +import ( + "io" + "unicode" +) + +type context struct { + reader io.RuneReader + offset int + readOffset int + readErr error + eof bool + cache *cache + tokens []rune + match bool + node *Node + isExcluded [][]string +} + +func newContext(r io.RuneReader) *context { + return &context{ + reader: r, + cache: &cache{}, + } +} + +func (c *context) read() bool { + if c.eof || c.readErr != nil { + return false + } + + t, n, err := c.reader.ReadRune() + if err != nil { + if err == io.EOF { + if n == 0 { + c.eof = true + return false + } + } else { + c.readErr = err + return false + } + } + + c.readOffset++ + + if t == unicode.ReplacementChar { + c.readErr = ErrInvalidCharacter + return false + } + + c.tokens = append(c.tokens, t) + return true +} + +func (c *context) token() (rune, bool) { + if c.offset == c.readOffset { + if !c.read() { + return 0, false + } + } + + return c.tokens[c.offset], true +} + +func (c *context) excluded(offset int, name string) bool { + if len(c.isExcluded) <= offset { + return false + } + + return stringsContain(c.isExcluded[offset], name) +} + +func (c *context) exclude(offset int, name string) { + if len(c.isExcluded) <= offset { + c.isExcluded = append(c.isExcluded, nil) + if cap(c.isExcluded) > offset { + c.isExcluded = c.isExcluded[:offset+1] + } else { + c.isExcluded = append( + c.isExcluded[:cap(c.isExcluded)], + make([][]string, offset+1-cap(c.isExcluded))..., + ) + } + } + + c.isExcluded[offset] = append(c.isExcluded[offset], name) +} + +func (c *context) include(offset int, name string) { + if len(c.isExcluded) <= offset { + return + } + + for i := len(c.isExcluded[offset]) - 1; i >= 0; i-- { + if c.isExcluded[offset][i] == name { + c.isExcluded[offset] = append(c.isExcluded[offset][:i], c.isExcluded[offset][i+1:]...) + } + } +} + +func (c *context) fromCache(name string) (bool, bool) { + n, m, ok := c.cache.get(c.offset, name) + if !ok { + return false, false + } + + if m { + c.success(n) + } else { + c.fail(c.offset) + } + + return m, true +} + +func (c *context) success(n *Node) { + c.node = n + c.offset = n.to + c.match = true +} + +func (c *context) fail(offset int) { + c.offset = offset + c.match = false +} + +func (c *context) finalize() error { + if c.node.to < c.readOffset { + return ErrUnexpectedCharacter + } + + if !c.eof { + c.read() + if !c.eof { + if c.readErr != nil { + return c.readErr + } + + return ErrUnexpectedCharacter + } + } + + c.node.commit() + if c.node.commitType&Alias != 0 { + return nil + } + + c.node.applyTokens(c.tokens) + return nil +} diff --git a/define.go b/define.go new file mode 100644 index 0000000..f11f6de --- /dev/null +++ b/define.go @@ -0,0 +1,274 @@ +package parse + +import "strconv" + +func runesContain(rs []rune, r rune) bool { + for _, ri := range rs { + if ri == r { + return true + } + } + + return false +} + +func unescapeChar(c rune) rune { + switch c { + case 'n': + return '\n' + case 't': + return '\t' + case 'b': + return '\b' + case 'f': + return '\f' + case 'r': + return '\r' + case 'v': + return '\v' + default: + return c + } +} + +func unescape(escape rune, banned []rune, chars []rune) ([]rune, error) { + var ( + unescaped []rune + escaped bool + ) + + for _, ci := range chars { + if escaped { + unescaped = append(unescaped, unescapeChar(ci)) + escaped = false + continue + } + + switch { + case ci == escape: + escaped = true + case runesContain(banned, ci): + return nil, ErrInvalidCharacter + default: + unescaped = append(unescaped, ci) + } + } + + if escaped { + return nil, ErrInvalidCharacter + } + + return unescaped, nil +} + +func dropComments(n *Node) *Node { + ncc := *n + nc := &ncc + + nc.Nodes = nil + for _, ni := range n.Nodes { + if ni.Name == "comment" { + continue + } + + nc.Nodes = append(nc.Nodes, dropComments(ni)) + } + + return nc +} + +func flagsToCommitType(n []*Node) CommitType { + var ct CommitType + for _, ni := range n { + switch ni.Name { + case "alias": + ct |= Alias + case "doc": + ct |= Documentation + case "root": + ct |= Root + } + } + + return ct +} + +func toRune(c string) rune { + return []rune(c)[0] +} + +func nodeChar(n *Node) rune { + s := n.Text() + if s[0] == '\\' { + return unescapeChar(toRune(s[1:])) + } + + return toRune(s) +} + +func defineMembers(s *Syntax, name string, n ...*Node) ([]string, error) { + var refs []string + for i, ni := range n { + nmi := childName(name, i) + switch ni.Name { + case "symbol": + refs = append(refs, ni.Text()) + default: + refs = append(refs, nmi) + if err := defineExpression(s, nmi, Alias, ni); err != nil { + return nil, err + } + } + } + + return refs, nil +} + +func defineClass(s *Syntax, name string, ct CommitType, n []*Node) error { + var ( + not bool + chars []rune + ranges [][]rune + ) + + if len(n) > 0 && n[0].Name == "class-not" { + not, n = true, n[1:] + } + + for _, c := range n { + switch c.Name { + case "class-char": + chars = append(chars, nodeChar(c)) + case "char-range": + ranges = append(ranges, []rune{nodeChar(c.Nodes[0]), nodeChar(c.Nodes[1])}) + } + } + + return s.Class(name, ct, not, chars, ranges) +} + +func defineCharSequence(s *Syntax, name string, ct CommitType, charNodes []*Node) error { + var chars []rune + for _, ci := range charNodes { + chars = append(chars, nodeChar(ci)) + } + + return s.CharSequence(name, ct, chars) +} + +func defineQuantifier(s *Syntax, name string, ct CommitType, n *Node, q *Node) error { + refs, err := defineMembers(s, name, n) + if err != nil { + return err + } + + var min, max int + switch q.Name { + case "count-quantifier": + min, err = strconv.Atoi(q.Nodes[0].Text()) + if err != nil { + return err + } + + max = min + case "range-quantifier": + min = 0 + max = -1 + for _, rq := range q.Nodes { + switch rq.Name { + case "range-from": + min, err = strconv.Atoi(rq.Text()) + if err != nil { + return err + } + case "range-to": + max, err = strconv.Atoi(rq.Text()) + if err != nil { + return err + } + default: + return ErrInvalidSyntax + } + } + case "one-or-more": + min, max = 1, -1 + case "zero-or-more": + min, max = 0, -1 + case "zero-or-one": + min, max = 0, 1 + } + + return s.Quantifier(name, ct, refs[0], min, max) +} + +func defineSequence(s *Syntax, name string, ct CommitType, n ...*Node) error { + refs, err := defineMembers(s, name, n...) + if err != nil { + return err + } + + // // TODO: try to make this expressed in the syntax (maybe as sequences need either a quantififer or not + // // one item? or by maintaining the excluded and caching in the sequence in a similar way when there is + // // only one item?) how does this effect the quantifiers? + // if len(refs) == 1 { + // return s.Choice(name, ct, refs[0]) + // } + + return s.Sequence(name, ct, refs...) +} + +func defineChoice(s *Syntax, name string, ct CommitType, n ...*Node) error { + refs, err := defineMembers(s, name, n...) + if err != nil { + return err + } + + return s.Choice(name, ct, refs...) +} + +func defineExpression(s *Syntax, name string, ct CommitType, expression *Node) error { + var err error + switch expression.Name { + case "any-char": + err = s.AnyChar(name, ct) + case "char-class": + err = defineClass(s, name, ct, expression.Nodes) + case "char-sequence": + err = defineCharSequence(s, name, ct, expression.Nodes) + case "symbol": + err = defineSequence(s, name, ct, expression) + case "quantifier": + err = defineQuantifier(s, name, ct, expression.Nodes[0], expression.Nodes[1]) + case "sequence": + err = defineSequence(s, name, ct, expression.Nodes...) + case "choice": + err = defineChoice(s, name, ct, expression.Nodes...) + } + + return err +} + +func defineDefinition(s *Syntax, n *Node) error { + return defineExpression( + s, + n.Nodes[0].Text(), + flagsToCommitType(n.Nodes[1:len(n.Nodes)-1]), + n.Nodes[len(n.Nodes)-1], + ) +} + +func define(s *Syntax, n *Node) error { + if n.Name != "syntax" { + return ErrInvalidSyntax + } + + n = dropComments(n) + + for _, ni := range n.Nodes { + if err := defineDefinition(s, ni); err != nil { + return err + } + } + + return nil +} diff --git a/eskip.p b/eskip.p new file mode 100644 index 0000000..541a577 --- /dev/null +++ b/eskip.p @@ -0,0 +1,57 @@ +/* +Eskip routing configuration format for Skipper: https://github.com/zalando/skipper +*/ + +// TODO: definition with comment, doc = comment, or just replace comment + +eskip:root = (expression | definitions)?; + +comment-line:alias = "//" [^\n]*; +space:alias = [ \b\f\r\t\v]; +comment:alias = comment-line (space* "\n" space* comment-line)*; + +wsc:alias = [ \b\f\n\r\t\v] | comment; + +decimal-digit:alias = [0-9]; +octal-digit:alias = [0-7]; +hexa-digit:alias = [0-9a-fA-F]; + +decimal:alias = [1-9] decimal-digit*; +octal:alias = "0" octal-digit*; +hexa:alias = "0" [xX] hexa-digit+; +int = decimal | octal | hexa; + +exponent:alias = [eE] [+\-]? decimal-digit+; +float = decimal-digit+ "." decimal-digit* exponent? + | "." decimal-digit+ exponent? + | decimal-digit+ exponent; + +number:alias = "-"? (int | float); + +string = "\"" ([^\\"] | "\\" .)* "\""; +regexp = "/" ([^\\/] | "\\" .)* "/"; +symbol = [a-zA-Z_] [a-zA-z0-9_]*; + +arg:alias = number | string | regexp; +args:alias = arg (wsc* "," wsc* arg)*; +term:alias = symbol wsc* "(" wsc* args? wsc* ")"; + +predicate = term; +predicates:alias = "*" | predicate (wsc* "&&" wsc* predicate)*; + +filter = term; +filters:alias = filter (wsc* "->" wsc* filter)*; + +address:alias = string; +shunt = ""; +loopback = ""; +backend:alias = address | shunt | loopback; + +expression = predicates (wsc* "->" wsc* filters)? wsc* "->" wsc* backend; + +id:alias = symbol; +definition = id wsc* ":" wsc* expression; + +free-sep:alias = (wsc | ";"); +sep:alias = wsc* ";" free-sep*; +definitions:alias = free-sep* definition (sep definition)* free-sep*; diff --git a/eskip_test.go b/eskip_test.go new file mode 100644 index 0000000..0a2915a --- /dev/null +++ b/eskip_test.go @@ -0,0 +1,749 @@ +package parse + +import ( + "bytes" + "errors" + "fmt" + "math/rand" + "strconv" + "strings" + "testing" + + "github.com/zalando/skipper/eskip" +) + +const ( + maxID = 27 + meanID = 9 + + setPathChance = 0.72 + maxPathTags = 12 + meanPathTags = 2 + maxPathTag = 24 + meanPathTag = 9 + + setHostChance = 0.5 + maxHost = 48 + meanHost = 24 + + setPathRegexpChance = 0.45 + maxPathRegexp = 36 + meanPathRegexp = 12 + + setMethodChance = 0.1 + + setHeadersChance = 0.3 + maxHeadersLength = 6 + meanHeadersLength = 1 + maxHeaderKeyLength = 18 + meanHeaderKeyLength = 12 + maxHeaderValueLength = 48 + meanHeaderValueLength = 6 + + setHeaderRegexpChance = 0.05 + maxHeaderRegexpsLength = 3 + meanHeaderRegexpsLength = 1 + maxHeaderRegexpLength = 12 + meanHeaderRegexpLength = 6 + + maxTermNameLength = 15 + meanTermNameLength = 6 + maxTermArgsLength = 6 + meanTermArgsLength = 1 + floatArgChance = 0.1 + intArgChance = 0.3 + maxTermStringLength = 24 + meanTermStringLength = 6 + + maxPredicatesLength = 4 + meanPredicatesLength = 1 + + maxFiltersLength = 18 + meanFiltersLength = 3 + + loopBackendChance = 0.05 + shuntBackendChance = 0.1 + maxBackend = 48 + meanBackend = 15 +) + +func takeChance(c float64) bool { + return rand.Float64() < c +} + +func generateID() string { + return generateString(maxID, meanID) +} + +func generatePath() string { + if !takeChance(setPathChance) { + return "" + } + + l := randomLength(maxPathTags, meanPathTags) + p := append(make([]string, 0, l+1), "") + for i := 0; i < l; i++ { + p = append(p, generateString(maxPathTag, meanPathTag)) + } + + return strings.Join(p, "/") +} + +func generateHostRegexps() []string { + if !takeChance(setHostChance) { + return nil + } + + return []string{generateString(maxHost, meanHost)} +} + +func generatePathRegexps() []string { + if !takeChance(setPathRegexpChance) { + return nil + } + + return []string{generateString(maxPathRegexp, meanPathRegexp)} +} + +func generateMethod() string { + if !takeChance(setMethodChance) { + return "" + } + + methods := []string{"GET", "HEAD", "POST", "PUT", "DELETE", "OPTIONS", "PATCH"} + return methods[rand.Intn(len(methods))] +} + +func generateHeaders() map[string]string { + if !takeChance(setHeadersChance) { + return nil + } + + h := make(map[string]string) + for i := 0; i < randomLength(maxHeadersLength, meanHeadersLength); i++ { + h[generateString(maxHeaderKeyLength, meanHeaderKeyLength)] = + generateString(maxHeaderValueLength, meanHeaderValueLength) + } + + return h +} + +func generateHeaderRegexps() map[string][]string { + if !takeChance(setHeaderRegexpChance) { + return nil + } + + h := make(map[string][]string) + for i := 0; i < randomLength(maxHeaderRegexpsLength, meanHeaderRegexpsLength); i++ { + k := generateString(maxHeaderKeyLength, meanHeaderKeyLength) + for i := 0; i < randomLength(maxHeaderRegexpLength, meanHeaderRegexpLength); i++ { + h[k] = append(h[k], generateString(maxHeaderValueLength, meanHeaderValueLength)) + } + } + + return h +} + +func generateTerm() (string, []interface{}) { + n := generateString(maxTermNameLength, meanTermNameLength) + al := randomLength(maxTermArgsLength, meanTermArgsLength) + a := make([]interface{}, 0, al) + for i := 0; i < al; i++ { + at := rand.Float64() + switch { + case at < floatArgChance: + a = append(a, rand.NormFloat64()) + case at < intArgChance: + a = append(a, rand.Int()) + default: + a = append(a, generateString(maxTermStringLength, meanTermStringLength)) + } + } + + return n, a +} + +func generatePredicates() []*eskip.Predicate { + l := randomLength(maxPredicatesLength, meanPredicatesLength) + p := make([]*eskip.Predicate, 0, l) + for i := 0; i < l; i++ { + pi := &eskip.Predicate{} + pi.Name, pi.Args = generateTerm() + p = append(p, pi) + } + + return p +} + +func generateFilters() []*eskip.Filter { + l := randomLength(maxFiltersLength, meanFiltersLength) + f := make([]*eskip.Filter, 0, l) + for i := 0; i < l; i++ { + fi := &eskip.Filter{} + fi.Name, fi.Args = generateTerm() + f = append(f, fi) + } + + return f +} + +func generateBackend() (eskip.BackendType, string) { + t := rand.Float64() + switch { + case t < loopBackendChance: + return eskip.LoopBackend, "" + case t < loopBackendChance+shuntBackendChance: + return eskip.ShuntBackend, "" + default: + return eskip.NetworkBackend, generateString(maxBackend, meanBackend) + } +} + +func generateRoute() *eskip.Route { + r := &eskip.Route{} + r.Id = generateID() + r.Path = generatePath() + r.HostRegexps = generateHostRegexps() + r.PathRegexps = generatePathRegexps() + r.Method = generateMethod() + r.Headers = generateHeaders() + r.HeaderRegexps = generateHeaderRegexps() + r.Predicates = generatePredicates() + r.Filters = generateFilters() + r.BackendType, r.Backend = generateBackend() + return r +} + +func generateEskip(l int) []*eskip.Route { + r := make([]*eskip.Route, 0, l) + for i := 0; i < l; i++ { + r = append(r, generateRoute()) + } + + return r +} + +func parseEskipInt(s string) (int, error) { + i, err := strconv.ParseInt(s, 0, 64) + return int(i), err +} + +func parseEskipFloat(s string) (float64, error) { + f, err := strconv.ParseFloat(s, 64) + return f, err +} + +func unquote(s string, escapedChars string) (string, error) { + if len(s) < 2 { + return "", nil + } + + b := make([]byte, 0, len(s)-2) + var escaped bool + for _, bi := range []byte(s[1 : len(s)-1]) { + if escaped { + switch bi { + case 'b': + bi = '\b' + case 'f': + bi = '\f' + case 'n': + bi = '\n' + case 'r': + bi = '\r' + case 't': + bi = '\t' + case 'v': + bi = '\v' + } + + b = append(b, bi) + escaped = false + continue + } + + for _, ec := range []byte(escapedChars) { + if ec == bi { + return "", errors.New("invalid quote") + } + } + + if bi == '\\' { + escaped = true + continue + } + + b = append(b, bi) + } + + return string(b), nil +} + +func unquoteString(s string) (string, error) { + return unquote(s, "\"") +} + +func unquoteRegexp(s string) (string, error) { + return unquote(s, "/") +} + +func nodeToArg(n *Node) (interface{}, error) { + switch n.Name { + case "int": + return parseEskipInt(n.Text()) + case "float": + return parseEskipFloat(n.Text()) + case "string": + return unquoteString(n.Text()) + case "regexp": + return unquoteRegexp(n.Text()) + default: + return nil, errors.New("invalid arg") + } +} + +func nodeToTerm(n *Node) (string, []interface{}, error) { + if len(n.Nodes) < 1 || n.Nodes[0].Name != "symbol" { + return "", nil, errors.New("invalid term") + } + + name := n.Nodes[0].Text() + + var args []interface{} + for _, ni := range n.Nodes[1:] { + a, err := nodeToArg(ni) + if err != nil { + return "", nil, err + } + + args = append(args, a) + } + + return name, args, nil +} + +func nodeToPredicate(r *eskip.Route, n *Node) error { + name, args, err := nodeToTerm(n) + if err != nil { + return err + } + + switch name { + case "Path": + if len(args) != 1 { + return errors.New("invalid path predicate") + } + + p, ok := args[0].(string) + if !ok { + return errors.New("invalid path predicate") + } + + r.Path = p + case "Host": + if len(args) != 1 { + return errors.New("invalid host predicate") + } + + h, ok := args[0].(string) + if !ok { + return errors.New("invalid host predicate") + } + + r.HostRegexps = append(r.HostRegexps, h) + case "PathRegexp": + if len(args) != 1 { + return errors.New("invalid path regexp predicate") + } + + p, ok := args[0].(string) + if !ok { + return errors.New("invalid path regexp predicate") + } + + r.PathRegexps = append(r.PathRegexps, p) + case "Method": + if len(args) != 1 { + return errors.New("invalid method predicate") + } + + m, ok := args[0].(string) + if !ok { + return errors.New("invalid method predicate") + } + + r.Method = m + case "Header": + if len(args) != 2 { + return errors.New("invalid header predicate") + } + + name, ok := args[0].(string) + if !ok { + return errors.New("invalid header predicate") + } + + value, ok := args[1].(string) + if !ok { + return errors.New("invalid header predicate") + } + + if r.Headers == nil { + r.Headers = make(map[string]string) + } + + r.Headers[name] = value + case "HeaderRegexp": + if len(args) != 2 { + return errors.New("invalid header regexp predicate") + } + + name, ok := args[0].(string) + if !ok { + return errors.New("invalid header regexp predicate") + } + + value, ok := args[1].(string) + if !ok { + return errors.New("invalid header regexp predicate") + } + + if r.HeaderRegexps == nil { + r.HeaderRegexps = make(map[string][]string) + } + + r.HeaderRegexps[name] = append(r.HeaderRegexps[name], value) + default: + r.Predicates = append(r.Predicates, &eskip.Predicate{Name: name, Args: args}) + } + + return nil +} + +func nodeToFilter(n *Node) (*eskip.Filter, error) { + name, args, err := nodeToTerm(n) + if err != nil { + return nil, err + } + + return &eskip.Filter{Name: name, Args: args}, nil +} + +func nodeToBackend(r *eskip.Route, n *Node) error { + switch n.Name { + case "string": + b, err := unquoteString(n.Text()) + if err != nil { + return err + } + + r.BackendType = eskip.NetworkBackend + r.Backend = b + case "shunt": + r.BackendType = eskip.ShuntBackend + case "loopback": + r.BackendType = eskip.LoopBackend + default: + return errors.New("invalid backend type") + } + + return nil +} + +func nodeToEskipDefinition(n *Node) (*eskip.Route, error) { + ns := n.Nodes + if len(ns) < 2 || len(ns[1].Nodes) == 0 { + return nil, fmt.Errorf("invalid definition length: %d", len(ns)) + } + + r := &eskip.Route{} + + if ns[0].Name != "symbol" { + return nil, errors.New("invalid definition id") + } + + r.Id, ns = ns[0].Text(), ns[1].Nodes + +predicates: + for i, ni := range ns { + switch ni.Name { + case "predicate": + if err := nodeToPredicate(r, ni); err != nil { + return nil, err + } + case "filter", "string", "shunt", "loopback": + ns = ns[i:] + break predicates + default: + return nil, errors.New("invalid definition item among predicates") + } + } + +filters: + for i, ni := range ns { + switch ni.Name { + case "filter": + f, err := nodeToFilter(ni) + if err != nil { + return nil, err + } + + r.Filters = append(r.Filters, f) + case "string", "shunt", "loopback": + ns = ns[i:] + break filters + default: + return nil, errors.New("invalid definition item among filters") + } + } + + if len(ns) != 1 { + return nil, fmt.Errorf("invalid definition backend, remaining definition length: %d, %s", + len(ns), n.Text()) + } + + if err := nodeToBackend(r, ns[0]); err != nil { + return nil, err + } + + return r, nil +} + +func treeToEskip(n []*Node) ([]*eskip.Route, error) { + r := make([]*eskip.Route, 0, len(n)) + for _, ni := range n { + d, err := nodeToEskipDefinition(ni) + if err != nil { + return nil, err + } + + r = append(r, d) + } + + return r, nil +} + +func checkTerm(t *testing.T, gotName, expectedName string, gotArgs, expectedArgs []interface{}) { + if gotName != expectedName { + t.Error("invalid term name") + return + } + + // legacy bug support + for i := len(expectedArgs) - 1; i >= 0; i-- { + if _, ok := expectedArgs[i].(int); ok { + expectedArgs = append(expectedArgs[:i], expectedArgs[i+1:]...) + continue + } + + if v, ok := expectedArgs[i].(float64); ok && v < 0 { + gotArgs = append(gotArgs[:i], gotArgs[i+1:]...) + expectedArgs = append(expectedArgs[:i], expectedArgs[i+1:]...) + } + } + + if len(gotArgs) != len(expectedArgs) { + t.Error("invalid term args length", len(gotArgs), len(expectedArgs)) + return + } + + for i, a := range gotArgs { + if a != expectedArgs[i] { + t.Error("invalid term arg") + return + } + } +} + +func checkPredicates(t *testing.T, got, expected *eskip.Route) { + if got.Path != expected.Path { + t.Error("invalid path") + return + } + + if len(got.HostRegexps) != len(expected.HostRegexps) { + t.Error("invalid host length") + return + } + + for i, h := range got.HostRegexps { + if h != expected.HostRegexps[i] { + t.Error("invalid host") + return + } + } + + if len(got.PathRegexps) != len(expected.PathRegexps) { + t.Error("invalid path regexp length", len(got.PathRegexps), len(expected.PathRegexps)) + return + } + + for i, h := range got.PathRegexps { + if h != expected.PathRegexps[i] { + t.Error("invalid path regexp") + return + } + } + + if got.Method != expected.Method { + t.Error("invalid method") + return + } + + if len(got.Headers) != len(expected.Headers) { + t.Error("invalid headers length") + return + } + + for n, h := range got.Headers { + he, ok := expected.Headers[n] + if !ok { + t.Error("invalid header name") + return + } + + if he != h { + t.Error("invalid header") + return + } + } + + if len(got.HeaderRegexps) != len(expected.HeaderRegexps) { + t.Error("invalid header regexp length") + return + } + + for n, h := range got.HeaderRegexps { + he, ok := expected.HeaderRegexps[n] + if !ok { + t.Error("invalid header regexp name") + return + } + + if len(h) != len(he) { + t.Error("invalid header regexp item length") + return + } + + for i, hi := range h { + if hi != he[i] { + t.Error("invalid header regexp") + return + } + } + } + + if len(got.Predicates) != len(expected.Predicates) { + t.Error("invalid predicates length") + return + } + + for i, p := range got.Predicates { + checkTerm( + t, + p.Name, expected.Predicates[i].Name, + p.Args, expected.Predicates[i].Args, + ) + + if t.Failed() { + t.Log(p.Name, expected.Predicates[i].Name) + t.Log(p.Args, expected.Predicates[i].Args) + return + } + } +} + +func checkFilters(t *testing.T, got, expected []*eskip.Filter) { + if len(got) != len(expected) { + t.Error("invalid filters length") + return + } + + for i, f := range got { + checkTerm( + t, + f.Name, expected[i].Name, + f.Args, expected[i].Args, + ) + + if t.Failed() { + return + } + } +} + +func checkBackend(t *testing.T, got, expected *eskip.Route) { + if got.BackendType != expected.BackendType { + t.Error("invalid backend type") + return + } + + if got.Backend != expected.Backend { + t.Error("invalid backend") + return + } +} + +func checkRoute(t *testing.T, got, expected *eskip.Route) { + if got.Id != expected.Id { + t.Error("invalid route id") + return + } + + checkPredicates(t, got, expected) + if t.Failed() { + return + } + + checkFilters(t, got.Filters, expected.Filters) + if t.Failed() { + return + } + + checkBackend(t, got, expected) +} + +func checkEskip(t *testing.T, got, expected []*eskip.Route) { + if len(got) != len(expected) { + t.Error("invalid length", len(got), len(expected)) + return + } + + for i, ri := range got { + checkRoute(t, ri, expected[i]) + if t.Failed() { + t.Log(ri.String()) + t.Log(expected[i].String()) + return + } + } +} + +func eskipTreeToEskip(n *Node) ([]*eskip.Route, error) { + return treeToEskip(n.Nodes) +} + +func TestEskip(t *testing.T) { + r := generateEskip(1 << 9) + e := eskip.Print(true, r...) + b := bytes.NewBufferString(e) + s, err := testSyntax("eskip.p", 0) + if err != nil { + t.Error(err) + return + } + + n, err := s.Parse(b) + if err != nil { + t.Error(err) + return + } + + rback, err := eskipTreeToEskip(n) + if err != nil { + t.Error(err) + return + } + + checkEskip(t, rback, r) +} diff --git a/json.p b/json.p new file mode 100644 index 0000000..cef6674 --- /dev/null +++ b/json.p @@ -0,0 +1,14 @@ +// JSON (http://www.json.org) +ws:alias = [ \b\f\n\r\t]; +true = "true"; +false = "false"; +null = "null"; +string = "\"" ([^\\"\b\f\n\r\t] | "\\" (["\\/bfnrt] | "u" [0-9a-f]{4}))* "\""; +number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; +entry = string ws* ":" ws* value; +object = "{" ws* (entry (ws* "," ws* entry)*)? ws* "}"; +array = "[" ws* (value (ws* "," ws* value)*)? ws* "]"; +value:alias = true | false | null | string | number | object | array; +json = value; + +// TODO: value should be an alias but test it first like this diff --git a/json_test.go b/json_test.go new file mode 100644 index 0000000..7bc1a29 --- /dev/null +++ b/json_test.go @@ -0,0 +1,557 @@ +package parse + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "math" + "math/rand" + "testing" +) + +type jsonValueType int + +const ( + jsonNone jsonValueType = iota + jsonTrue + jsonFalse + jsonNull + jsonString + jsonNumber + jsonObject + jsonArray +) + +const ( + maxStringLength = 64 + meanStringLength = 18 + maxKeyLength = 24 + meanKeyLength = 6 + maxObjectLength = 12 + meanObjectLength = 6 + maxArrayLength = 64 + meanArrayLength = 8 +) + +func randomLength(max, mean int) int { + return int(rand.NormFloat64()*float64(max)/math.MaxFloat64 + float64(mean)) +} + +func generateString(max, mean int) string { + l := randomLength(max, mean) + b := make([]byte, l) + for i := range b { + b[i] = byte(rand.Intn(int('z')-int('a')+1)) + 'a' + } + + return string(b) +} + +func generateJSONString() string { + return generateString(maxStringLength, meanStringLength) +} + +func generateJSONNumber() interface{} { + if rand.Intn(2) == 1 { + return rand.NormFloat64() + } + + n := rand.Int() + if rand.Intn(2) == 0 { + return n + } + + return -n +} + +func generateKey() string { + return generateString(maxKeyLength, meanKeyLength) +} + +func generateJSONObject(minDepth int) map[string]interface{} { + l := randomLength(maxObjectLength, meanObjectLength) + o := make(map[string]interface{}) + for i := 0; i < l; i++ { + o[generateKey()] = generateJSON(0) + } + + if minDepth > 0 { + o[generateKey()] = generateJSON(minDepth) + } + + return o +} + +func generateJSONArray(minDepth int) []interface{} { + l := randomLength(maxArrayLength, meanArrayLength) + a := make([]interface{}, l, l+1) + for i := 0; i < l; i++ { + a[i] = generateJSON(0) + } + + if minDepth > 0 { + a = append(a, generateJSON(minDepth)) + } + + return a +} + +func generateJSONObjectOrArray(minDepth int) interface{} { + if rand.Intn(2) == 0 { + return generateJSONObject(minDepth - 1) + } + + return generateJSONArray(minDepth - 1) +} + +func generateJSON(minDepth int) interface{} { + if minDepth > 0 { + return generateJSONObjectOrArray(minDepth) + } + + switch jsonValueType(rand.Intn(int(jsonNumber)) + 1) { + case jsonTrue: + return true + case jsonFalse: + return false + case jsonNull: + return nil + case jsonString: + return generateJSONString() + case jsonNumber: + return generateJSONNumber() + default: + panic("invalid json type") + } +} + +func unqouteJSONString(t string) (string, error) { + var s string + err := json.Unmarshal([]byte(t), &s) + return s, err +} + +func parseJSONNumber(t string) (interface{}, error) { + n := json.Number(t) + if i, err := n.Int64(); err == nil { + return int(i), nil + } + + return n.Float64() +} + +func nodeToJSONObject(n *Node) (map[string]interface{}, error) { + o := make(map[string]interface{}) + for _, ni := range n.Nodes { + if len(ni.Nodes) != 2 { + return nil, errors.New("invalid json object") + } + + key, err := unqouteJSONString(ni.Nodes[0].Text()) + if err != nil { + return nil, err + } + + val, err := treeToJSON(ni.Nodes[1]) + if err != nil { + return nil, err + } + + o[key] = val + } + + return o, nil +} + +func nodeToJSONArray(n *Node) ([]interface{}, error) { + a := make([]interface{}, 0, len(n.Nodes)) + for _, ni := range n.Nodes { + item, err := treeToJSON(ni) + if err != nil { + return nil, err + } + + a = append(a, item) + } + + return a, nil +} + +func treeToJSON(n *Node) (interface{}, error) { + switch n.Name { + case "true": + return true, nil + case "false": + return false, nil + case "null": + return nil, nil + case "string": + return unqouteJSONString(n.Text()) + case "number": + return parseJSONNumber(n.Text()) + case "object": + return nodeToJSONObject(n) + case "array": + return nodeToJSONArray(n) + default: + return nil, fmt.Errorf("invalid json node name: %s", n.Name) + } +} + +func checkJSON(t *testing.T, got, expected interface{}) { + if expected == nil { + if got != nil { + t.Error("expected nil", got) + } + + return + } + + switch v := expected.(type) { + case bool: + if v != got.(bool) { + t.Error("expected bool", got) + } + case string: + if v != got.(string) { + t.Error("expected string", got) + } + case int: + if v != got.(int) { + t.Error("expected int", got) + } + case float64: + if v != got.(float64) { + t.Error("expected float64", got) + } + case map[string]interface{}: + o, ok := got.(map[string]interface{}) + if !ok { + t.Error("expected object", got) + return + } + + if len(v) != len(o) { + t.Error("invalid object length, expected: %d, got: %d", len(v), len(o)) + return + } + + for key, val := range v { + gotVal, ok := o[key] + if !ok { + t.Error("expected key not found: %s", key) + return + } + + checkJSON(t, gotVal, val) + if t.Failed() { + return + } + } + case []interface{}: + a, ok := got.([]interface{}) + if !ok { + t.Error("expected array", got) + } + + if len(v) != len(a) { + t.Error("invalid array length, expected: %d, got: %d", len(v), len(a)) + return + } + + for i := range v { + checkJSON(t, a[i], v[i]) + if t.Failed() { + return + } + } + default: + t.Error("unexpected parsed type", v) + } +} + +func jsonTreeToJSON(n *Node) (interface{}, error) { + if n.Name != "json" { + return nil, fmt.Errorf("invalid root node name: %s", n.Name) + } + + if len(n.Nodes) != 1 { + return nil, fmt.Errorf("invalid root node length: %d", len(n.Nodes)) + } + + return treeToJSON(n.Nodes[0]) +} + +func TestJSON(t *testing.T) { + test(t, "json.p", "value", []testItem{{ + msg: "true", + text: "true", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "true", + }}, + }, + ignorePosition: true, + }, { + msg: "false", + text: "false", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "false", + }}, + }, + ignorePosition: true, + }, { + msg: "null", + text: "null", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "null", + }}, + }, + ignorePosition: true, + }, { + msg: "string", + text: `"\"\\n\b\t\uabcd"`, + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "string", + }}, + }, + ignorePosition: true, + }, { + msg: "number", + text: "6.62e-34", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "number", + }}, + }, + ignorePosition: true, + }, { + msg: "object", + text: `{ + "true": true, + "false": false, + "null": null, + "string": "string", + "number": 42, + "object": {}, + "array": [] + }`, + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "object", + Nodes: []*Node{{ + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "true", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "false", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "null", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "string", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "number", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "object", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "array", + }}, + }}, + }}, + }, + ignorePosition: true, + }, { + msg: "array", + text: `[true, false, null, "string", 42, { + "true": true, + "false": false, + "null": null, + "string": "string", + "number": 42, + "object": {}, + "array": [] + }, []]`, + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "array", + Nodes: []*Node{{ + Name: "true", + }, { + Name: "false", + }, { + Name: "null", + }, { + Name: "string", + }, { + Name: "number", + }, { + Name: "object", + Nodes: []*Node{{ + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "true", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "false", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "null", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "string", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "number", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "object", + }}, + }, { + Name: "entry", + Nodes: []*Node{{ + Name: "string", + }, { + Name: "array", + }}, + }}, + }, { + Name: "array", + }}, + }}, + }, + ignorePosition: true, + }, { + msg: "bugfix, 100", + text: "100", + node: &Node{ + Name: "json", + Nodes: []*Node{{ + Name: "number", + }}, + }, + ignorePosition: true, + }}) +} + +func TestRandomJSON(t *testing.T) { + j := generateJSON(48) + b, err := json.Marshal(j) + if err != nil { + t.Error(err) + return + } + + buf := bytes.NewBuffer(b) + + s, err := testSyntax("json.p", 0) + if err != nil { + t.Error(err) + return + } + + testParse := func(t *testing.T, buf io.Reader) { + n, err := s.Parse(buf) + if err != nil { + t.Error(err) + return + } + + jback, err := jsonTreeToJSON(n) + if err != nil { + t.Error(err) + return + } + + checkJSON(t, jback, j) + } + + t.Run("unindented", func(t *testing.T) { + testParse(t, buf) + }) + + indented := bytes.NewBuffer(nil) + if err := json.Indent(indented, b, "", " "); err != nil { + t.Error(err) + return + } + + t.Run("indented", func(t *testing.T) { + testParse(t, indented) + }) + + indentedTabs := bytes.NewBuffer(nil) + if err := json.Indent(indentedTabs, b, "", "\t"); err != nil { + t.Error(err) + return + } + + t.Run("indented with tabs", func(t *testing.T) { + testParse(t, indentedTabs) + }) +} diff --git a/keyval.p b/keyval.p new file mode 100644 index 0000000..4eff543 --- /dev/null +++ b/keyval.p @@ -0,0 +1,29 @@ +ws:alias = [ \b\f\r\t\v]; +wsnl:alias = ws | "\n"; + +comment-line:alias = "#" [^\n]*; +comment = comment-line (ws* "\n" ws* comment-line)*; + +wsc:alias = ws | comment-line; +wsnlc:alias = wsnl | comment-line; + +quoted:alias = "\"" ([^\\"] | "\\" .)* "\""; +symbol-non-ws:alias = ([^\\"\n=#.\[\] \b\f\r\t\v] | "\\" .)+; +symbol = symbol-non-ws (ws* symbol-non-ws)* | quoted; + +key-form:alias = symbol (ws* "." ws* symbol)*; +key = key-form; +group-key = (comment "\n" ws*)? "[" ws* key-form ws* "]"; + +value-chars:alias = ([^\\"\n=# \b\f\r\t\v] | "\\" .)+; +value = value-chars (ws* value-chars)* | quoted; +key-val = (comment "\n" ws*)? (key | key? ws* "=" ws* value?); + +entry:alias = group-key | key-val; +doc:root = (entry (ws* comment-line)? | wsnlc)*; + +// TODO: not tested +// set as root for streaming: +single-entry = (entry (ws* comment-line)? + | wsnlc* entry (ws* comment-line)?) + []; diff --git a/keyval_test.go b/keyval_test.go new file mode 100644 index 0000000..8c58d96 --- /dev/null +++ b/keyval_test.go @@ -0,0 +1,394 @@ +package parse + +import "testing" + +func TestKeyVal(t *testing.T) { + test(t, "keyval.p", "doc", []testItem{{ + msg: "empty", + }, { + msg: "a comment", + text: "# a comment", + }, { + msg: "a key", + text: "a key", + nodes: []*Node{{ + Name: "key-val", + to: 5, + Nodes: []*Node{{ + Name: "key", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 5, + }}, + }}, + }}, + }, { + msg: "a key with a preceeding whitespace", + text: " a key", + nodes: []*Node{{ + Name: "key-val", + from: 1, + to: 6, + Nodes: []*Node{{ + Name: "key", + from: 1, + to: 6, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 6, + }}, + }}, + }}, + }, { + msg: "a key and a comment", + text: ` + # a comment + + a key + `, + nodes: []*Node{{ + Name: "key-val", + from: 20, + to: 25, + Nodes: []*Node{{ + Name: "key", + from: 20, + to: 25, + Nodes: []*Node{{ + Name: "symbol", + from: 20, + to: 25, + }}, + }}, + }}, + }, { + msg: "a key value pair", + text: "a key = a value", + nodes: []*Node{{ + Name: "key-val", + to: 15, + Nodes: []*Node{{ + Name: "key", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 5, + }}, + }, { + Name: "value", + from: 8, + to: 15, + }}, + }}, + }, { + msg: "key value pairs with a comment at the end of line", + text: ` + a key = a value # a comment + another key = another value # another comment + `, + nodes: []*Node{{ + Name: "key-val", + from: 11, + to: 32, + Nodes: []*Node{{ + Name: "key", + from: 11, + to: 16, + Nodes: []*Node{{ + Name: "symbol", + from: 11, + to: 16, + }}, + }, { + Name: "value", + from: 25, + to: 32, + }}, + }, { + Name: "key-val", + from: 61, + to: 88, + Nodes: []*Node{{ + Name: "key", + from: 61, + to: 72, + Nodes: []*Node{{ + Name: "symbol", + from: 61, + to: 72, + }}, + }, { + Name: "value", + from: 75, + to: 88, + }}, + }}, + }, { + msg: "value without a key", + text: "= a value", + nodes: []*Node{{ + Name: "key-val", + to: 9, + Nodes: []*Node{{ + Name: "value", + from: 2, + to: 9, + }}, + }}, + }, { + msg: "a key value pair with comment", + text: ` + # a comment + a key = a value + `, + nodes: []*Node{{ + Name: "key-val", + from: 4, + to: 34, + Nodes: []*Node{{ + Name: "comment", + from: 4, + to: 15, + }, { + Name: "key", + from: 19, + to: 24, + Nodes: []*Node{{ + Name: "symbol", + from: 19, + to: 24, + }}, + }, { + Name: "value", + from: 27, + to: 34, + }}, + }}, + }, { + msg: "a key with multiple symbols", + text: "a key . with.multiple.symbols=a value", + nodes: []*Node{{ + Name: "key-val", + to: 37, + Nodes: []*Node{{ + Name: "key", + from: 0, + to: 29, + Nodes: []*Node{{ + Name: "symbol", + from: 0, + to: 5, + }, { + Name: "symbol", + from: 8, + to: 12, + }, { + Name: "symbol", + from: 13, + to: 21, + }, { + Name: "symbol", + from: 22, + to: 29, + }}, + }, { + Name: "value", + from: 30, + to: 37, + }}, + }}, + }, { + msg: "a group key", + text: ` + # a comment + [a group key.empty] + `, + nodes: []*Node{{ + Name: "group-key", + from: 4, + to: 38, + Nodes: []*Node{{ + Name: "comment", + from: 4, + to: 15, + }, { + Name: "symbol", + from: 20, + to: 31, + }, { + Name: "symbol", + from: 32, + to: 37, + }}, + }}, + }, { + msg: "a group key with multiple values", + text: ` + [foo.bar.baz] + = one + = two + = three + `, + nodes: []*Node{{ + Name: "group-key", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }}, + ignorePosition: true, + }, { + msg: "a group key with multiple values, in a single line", + text: "[foo.bar.baz] = one = two = three", + nodes: []*Node{{ + Name: "group-key", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "value", + }}, + }}, + ignorePosition: true, + }, { + msg: "full example", + text: ` + # a keyval document + + key1 = foo + key1.a = bar + key1.b = baz + + key2 = qux + + # foo bar baz values + [foo.bar.baz] + a = 1 + b = 2 # even + c = 3 + `, + nodes: []*Node{{ + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "group-key", + Nodes: []*Node{{ + Name: "comment", + }, { + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }, { + Name: "key-val", + Nodes: []*Node{{ + Name: "key", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "value", + }}, + }}, + ignorePosition: true, + }}) +} diff --git a/mml.p b/mml.p new file mode 100644 index 0000000..f051784 --- /dev/null +++ b/mml.p @@ -0,0 +1,527 @@ +// whitespace is ignored except for \n which is only ignored +// most of the time, but can serve as separator in: +// - list +// - struct +// - function args +// - statements +// - list, struct and function type constraints +ws:alias = " " | "\b" | "\f" | "\r" | "\t" | "\v"; +wsnl:alias = ws | "\n"; +wsc:alias = ws | comment; +wsnlc:alias = wsc | "\n"; + +// comments can be line or block comments +line-comment-content = [^\n]*; +line-comment:alias = "//" line-comment-content; +block-comment-content = ([^*] | "*" [^/])*; +block-comment:alias = "/*" block-comment-content "*/"; +comment-part:alias = line-comment | block-comment; +comment = comment-part (ws* "\n"? ws* comment-part)*; + +decimal-digit:alias = [0-9]; +octal-digit:alias = [0-7]; +hexa-digit:alias = [0-9a-fA-F]; + +// interger examples: 42, 0666, 0xfff +decimal:alias = [1-9] decimal-digit*; +octal:alias = "0" octal-digit*; +hexa:alias = "0" [xX] hexa-digit+; +int = decimal | octal | hexa; + +// float examples: .0, 0., 3.14, 1E-12 +exponent:alias = [eE] [+\-]? decimal-digit+; +float = decimal-digit+ "." decimal-digit* exponent? + | "." decimal-digit+ exponent? + | decimal-digit+ exponent; + +// string example: "Hello, world!" +// only \ and " need to be escaped, e.g. allows new lines +// common escaped chars get unescaped, the rest gets unescaped to themselves +string = "\"" ([^\\"] | "\\" .)* "\""; + +true = "true"; +false = "false"; +bool:alias = true | false; + +// symbols normally can have only \w chars: fooBar_baz +// basic symbols cannot start with a digit +// some positions allow strings to be used as symbols, e.g: let "123" 123 +// when this is not possible, dynamic symbols need to be used, but they are +// not allowed in every case, e.g: {symbol(foo()): "bar"} +// TODO: needs decision log for dynamic symbol +// TODO: exclude keywords +// +// dynamic symbol decision log: +// - every value is equatable +// - structs can act as hashtables (optimization is transparent) +// - in structs, must differentiate between symbol and value of a symbol when used as a key +// - js style [a] would be enough for the structs +// - the variables in a scope are like fields in a struct +// - [a] would be ambigous with the list as an expression +// - a logical loophole is closed with symbol(a) +// - dynamic-symbols need to be handled differently in match expressions and type expressions +symbol = [a-zA-Z_][a-zA-Z_0-9]*; +static-symbol:alias = symbol | string; +dynamic-symbol = "symbol" wsc* "(" wsnlc* expression wsnlc* ")"; +symbol-expression:alias = static-symbol | dynamic-symbol; + +// TODO: what happens when a dynamic symbol gets exported? + +// list items are separated by comma or new line (or both) +/* + [] + [a, b, c] + [ + a + b + c + ] + [1, 2, a..., [b, c], [d, [e]]...] +*/ +spread-expression = primary-expression wsc* "..."; +list-sep:alias = wsc* ("," | "\n") (wsnlc | ",")*; +list-item:alias = expression | spread-expression; +expression-list:alias = list-item (list-sep list-item)*; + +// list example: [1, 2, 3] +// lists can be constructed with other lists: [l1..., l2...] +list-fact:alias = "[" (wsnlc | ",")* expression-list? (wsnlc | ",")* "]"; +list = list-fact; +mutable-list = "~" wsnlc* list-fact; + +indexer-symbol = "[" wsnlc* expression wsnlc* "]"; +entry = (symbol-expression | indexer-symbol) wsnlc* ":" wsnlc* expression; +entry-list:alias = (entry | spread-expression) (list-sep (entry | spread-expression))*; +struct-fact:alias = "{" (wsnlc | ",")* entry-list? (wsnlc | ",")* "}"; +struct = struct-fact; +mutable-struct = "~" wsnlc* struct-fact; + +channel = "<>" | "<" wsnlc* int wsnlc* ">"; + +and-expression:doc = "and" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; +or-expression:doc = "or" wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; + +// TODO: use collect +argument-list:alias = static-symbol (list-sep static-symbol)*; +collect-symbol = "..." wsnlc* static-symbol; +function-fact:alias = "(" (wsnlc | ",")* + argument-list? + (wsnlc | ",")* + collect-symbol? + (wsnlc | ",")* ")" wsnlc* + expression; +function = "fn" wsnlc* function-fact; // can it ever cause a conflict with call and grouping? +effect = "fn" wsnlc* "~" wsnlc* function-fact; + +/* +a[42] +a[3:9] +a[:9] +a[3:] +a[b][c][d] +a.foo +a."foo" +a.symbol(foo) +*/ +range-from = expression; +range-to = expression; +range-expression:alias = range-from? wsnlc* ":" wsnlc* range-to?; +indexer-expression:alias = expression | range-expression; +expression-indexer:alias = primary-expression wsc* "[" wsnlc* indexer-expression wsnlc* "]"; +symbol-indexer:alias = primary-expression wsnlc* "." wsnlc* symbol-expression; // TODO: test with a float on a new line +indexer = expression-indexer | symbol-indexer; + +function-application = primary-expression wsc* "(" (wsnlc | ",")* expression-list? (wsnlc | ",")* ")"; + +if = "if" wsnlc* expression wsnlc* block + (wsnlc* "else" wsnlc* "if" wsnlc* expression wsnlc* block)* + (wsnlc* "else" wsnlc* block)?; + +default = "default" wsnlc* ":"; +default-line:alias = default (wsnlc | ";")* statement?; +case = "case" wsnlc* expression wsnlc* ":"; +case-line:alias = case (wsnlc | ";")* statement?; +switch = "switch" wsnlc* expression? wsnlc* "{" (wsnlc | ";")* + ((case-line | default-line) (sep (case-line | default-line | statement))*)? + (wsnlc | ";")* "}"; +// TODO: empty case not handled + +int-type = "int"; +float-type = "float"; +string-type = "string"; +bool-type = "bool"; +error-type = "error"; + +primitive-type:alias = int-type + | float-type + | string-type + | bool-type + | error-type; + +type-alias-name:alias = static-symbol; + +static-range-from = int; +static-range-to = int; +static-range-expression:alias = static-range-from? wsnlc* ":" wsnlc* static-range-to?; +items-quantifier = int | static-range-expression; +// TODO: maybe this can be confusing with matching constants. Shall we support matching constants, values? + +items-type = items-quantifier + | type-set (wsnlc* ":" wsnlc* items-quantifier)? + | static-symbol wsnlc* type-set (wsnlc* ":" wsnlc* items-quantifier)?; + +destructure-item = type-set | static-symbol wsnlc* type-set; + +collect-destructure-item = "..." wsnlc* destructure-item? + (wsnlc* ":" items-quantifier)?; +list-destructure-type = destructure-item + (list-sep destructure-item)* + (list-sep collect-destructure-item)? + | collect-destructure-item; +list-type-fact:alias = "[" (wsnlc | ",")* + (items-type | list-destructure-type)? + (wsnlc | ",")* "]"; +list-type = list-type-fact; +mutable-list-type = "~" wsnlc* list-type-fact; + +destructure-match-item = match-set + | static-symbol wsnlc* match-set + | static-symbol wsnlc* static-symbol wsnlc* match-set; + +collect-destructure-match-item = "..." wsnlc* destructure-match-item? + (wsnlc* ":" items-quantifier)?; +list-destructure-match = destructure-match-item + (list-sep destructure-match-item)* + (list-sep collect-destructure-match-item)? + | collect-destructure-match-item; +list-match-fact:alias = "[" (wsnlc | ",")* + (list-destructure-match | items-type)? + (wsnlc | ",")* "]"; +list-match = list-match-fact; +mutable-list-match = "~" wsnlc* list-match; + +entry-type = static-symbol (wsnlc* ":" wsnlc* destructure-item)?; +entry-types:alias = entry-type (list-sep entry-type)*; +struct-type-fact:alias = "{" (wsnlc | ",")* entry-types? (wsnlc | ",")* "}"; +struct-type = struct-type-fact; +mutable-struct-type = "~" wsnlc* struct-type-fact; + +entry-match = static-symbol (wsnlc* ":" wsnlc* destructure-match-item)?; +entry-matches:alias = entry-match (list-sep entry-match)*; +struct-match-fact:alias = "{" (wsnlc | ",")* entry-matches? (wsnlc | ",")* "}"; +struct-match = struct-match-fact; +mutable-struct-match = "~" wsnlc* struct-match-fact; + +arg-type = type-set | static-symbol wsnlc* type-set; +args-type:alias = arg-type (list-sep arg-type)*; +function-type-fact:alias = "(" wsnlc* args-type? wsnlc* ")" + (wsc* (type-set | static-symbol wsc* type-set))?; +function-type = "fn" wsnlc* function-type-fact; +effect-type = "fn" wsnlc* "~" wsnlc* function-type-fact; + +// TODO: heavy naming crime + +receive-direction = "receive"; +send-direction = "send"; +channel-type = "<" wsnlc* + (receive-direction | send-direction)? wsnlc* + destructure-item? + wsnlc* ">"; + +type-fact-group:alias = "(" wsnlc* type-fact wsnlc* ")"; +type-fact:alias = primitive-type + | type-alias-name + | list-type + | mutable-list-type + | struct-type + | mutable-struct-type + | function-type + | effect-type + | channel-type + | type-fact-group; + +type-set:alias = type-fact (wsnlc* "|" wsnlc* type-fact)*; +type-expression:alias = type-set | static-symbol wsc* type-set; + +match-fact:alias = list-match + | mutable-list-match + | struct-match + | mutable-struct-match; + +match-set:alias = type-set | match-fact; +match-expression:alias = match-set | static-symbol wsc* match-set; + +match-case = "case" wsnlc* match-expression wsnlc* ":"; +match-case-line:alias = match-case (wsnlc | ";")* statement?; +match = "match" wsnlc* expression wsnlc* "{" (wsnlc | ";")* + ((match-case-line | default-line) + (sep (match-case-line | default-line | statement))*)? + (wsnlc | ";")* "}"; + +conditional:alias = if + | switch + | match; + +receive-call = "receive" wsc* "(" (wsnlc | ",")* expression (wsnlc | ",")* ")"; +receive-op = "<-" wsc* primary-expression; +receive-expression-group:alias = "(" wsnlc* receive-expression wsnlc* ")"; +receive-expression:alias = receive-call | receive-op | receive-expression-group; + +receive-assign-capture:alias = assignable wsnlc* ("=" wsnlc*)? receive-expression; +receive-assignment = "set" wsnlc* receive-assign-capture; +receive-assignment-equal = assignable wsnlc* "=" wsnlc* receive-expression; +receive-capture:alias = symbol-expression wsnlc* ("=" wsnlc*)? receive-expression; +receive-definition = "let" wsnlc* receive-capture; +receive-mutable-definition = "let" wcnl* "~" wsnlc* receive-capture; +receive-statement:alias = receive-assignment | receive-definition; + +send-call:alias = "send" wsc* "(" (wsnlc | ",")* expression list-sep expression (wsnlc | ",")* ")"; +send-op:alias = primary-expression wsc* "<-" wsc* expression; +send-call-group:alias = "(" wsnlc* send wsnlc* ")"; +send = send-call | send-op | send-call-group; + +close = "close" wsc* "(" (wsnlc | ",")* expression (wsnlc | ",")* ")"; + +communication-group:alias = "(" wsnlc* communication wsnlc* ")"; +communication:alias = receive-expression | receive-statement | send | communication-group; + +select-case = "case" wsnlc* communication wsnlc* ":"; +select-case-line:alias = select-case (wsnlc | ";")* statement?; +select = "select" wsnlc* "{" (wsnlc | ";")* + ((select-case-line | default-line) + (sep (select-case-line | default-line | statement))*)? + (wsnlc | ";")* "}"; + +go = "go" wsnlc* function-application; + +/* +require . = "mml/foo" +require bar = "mml/foo" +require . "mml/foo" +require bar "mml/foo" +require "mml/foo" +require ( + . = "mml/foo" + bar = "mml/foo" + . "mml/foo" + bar "mml/foo" + "mml/foo" +) +require () +*/ +require-inline = "."; +require-fact = string + | (static-symbol | require-inline) (wsnlc* "=")? wsnlc* string; +require-facts:alias = require-fact (list-sep require-fact)*; +require-statement:alias = "require" wsnlc* require-fact; +require-statement-group:alias = "require" wsc* "(" (wsnlc | ",")* + require-facts? + (wsnlc | ",")* ")"; +require = require-statement | require-statement-group; + +panic = "panic" wsc* "(" (wsnlc | ",")* expression (wsnlc | ",")* ")"; +recover = "recover" wsc* "(" (wsnlc | ",")* ")"; + +block = "{" (wsnlc | ";")* statements? (wsnlc | ";")* "}"; +expression-group:alias = "(" wsnlc* expression wsnlc* ")"; + +primary-expression:alias = int + | float + | string + | bool + | symbol + | dynamic-symbol + | list + | mutable-list + | struct + | mutable-struct + | channel + | and-expression // only documentation + | or-expression // only documentation + | function + | effect + | indexer + | function-application // pseudo-expression + | conditional // pseudo-expression + | receive-call + | select // pseudo-expression + | recover + | block // pseudo-expression + | expression-group; + +plus = "+"; +minus = "-"; +logical-not = "!"; +binary-not = "^"; +unary-operator:alias = plus | minus | logical-not | binary-not; +unary-expression = unary-operator wsc* primary-expression | receive-op; + +mul = "*"; +div = "/"; +mod = "%"; +lshift = "<<"; +rshift = ">>"; +binary-and = "&"; +and-not = "&^"; + +add = "+"; +sub = "-"; +binary-or = "|"; +xor = "^"; + +eq = "=="; +not-eq = "!="; +less = "<"; +less-or-eq = "<="; +greater = ">"; +greater-or-eq = ">="; + +logical-and = "&&"; +logical-or = "||"; + +chain = "->"; + +binary-op0:alias = mul | div | mod | lshift | rshift | binary-and | and-not; +binary-op1:alias = add | sub | binary-or | xor; +binary-op2:alias = eq | not-eq | less | less-or-eq | greater | greater-or-eq; +binary-op3:alias = logical-and; +binary-op4:alias = logical-or; +binary-op5:alias = chain; + +operand0:alias = primary-expression | unary-expression; +operand1:alias = operand0 | binary0; +operand2:alias = operand1 | binary1; +operand3:alias = operand2 | binary2; +operand4:alias = operand3 | binary3; +operand5:alias = operand4 | binary4; + +binary0 = operand0 wsc* binary-op0 wsc* operand0; +binary1 = operand1 wsc* binary-op1 wsc* operand1; +binary2 = operand2 wsc* binary-op2 wsc* operand2; +binary3 = operand3 wsc* binary-op3 wsc* operand3; +binary4 = operand4 wsc* binary-op4 wsc* operand4; +binary5 = operand5 wsc* binary-op5 wsc* operand5; + +binary-expression:alias = binary0 | binary1 | binary2 | binary3 | binary4 | binary5; + +ternary-expression = expression wsnlc* "?" wsnlc* expression wsnlc* ":" wsnlc* expression; + +expression:alias = primary-expression + | unary-expression + | binary-expression + | ternary-expression; + +// TODO: code() +// TODO: observability + +break = "break"; +continue = "continue"; +loop-control:alias = break | continue; + +in-expression = static-symbol wsnlc* "in" wsnlc* (expression | range-expression); +loop-expression = expression | in-expression; +loop = "for" wsnlc* (block | loop-expression wsnlc* block); + +/* +a = b +set c = d +set e f +set ( + g = h + i j +) +*/ +assignable:alias = symbol-expression | indexer; +assign-capture = assignable wsnlc* ("=" wsnlc*)? expression; +assign-set:alias = "set" wsnlc* assign-capture; +assign-equal = assignable wsnlc* "=" wsnlc* expression; +assign-captures:alias = assign-capture (list-sep assign-capture)*; +assign-group:alias = "set" wsnlc* "(" (wsnlc | ",")* assign-captures? (wsnlc | ",")* ")"; +assignment = assign-set | assign-equal | assign-group; + +/* +let a = b +let c d +let ~ e = f +let ~ g h +let ( + i = j + k l + ~ m = n + ~ o p +) +let ~ ( + q = r + s t +) +*/ +value-capture-fact:alias = symbol-expression wsnlc* ("=" wsnlc*)? expression; +value-capture = value-capture-fact; +mutable-capture = "~" wsnlc* value-capture-fact; +value-definition = "let" wsnlc* (value-capture | mutable-capture); +value-captures:alias = value-capture (list-sep value-capture)*; +mixed-captures:alias = (value-capture | mutable-capture) (list-sep (value-capture | mutable-capture))*; +value-definition-group = "let" wsnlc* "(" (wsnlc | ",")* mixed-captures? (wsnlc | ",")* ")"; +mutable-definition-group = "let" wsnlc* "~" wsnlc* "(" (wsnlc | ",")* value-captures? (wsnlc | ",")* ")"; + +/* +fn a() b +fn ~ c() d +fn ( + e() f + ~ g() h +) +fn ~ ( + i() + j() +) +*/ +function-definition-fact:alias = static-symbol wsnlc* function-fact; +function-capture = function-definition-fact; +effect-capture = "~" wsnlc* function-definition-fact; +function-definition = "fn" wsnlc* (function-capture | effect-capture); +function-captures:alias = function-capture (list-sep function-capture)*; +mixed-function-captures:alias = (function-capture | effect-capture) + (list-sep (function-capture | effect-capture))*; +function-definition-group = "fn" wsnlc* "(" (wsnlc | ",")* + mixed-function-captures? + (wsnlc | ",")* ")"; +effect-definition-group = "fn" wsnlc* "~" wsnlc* "(" (wsnlc | ",")* + function-captures? + (wsnlc | ",")* ")"; + +definition:alias = value-definition + | value-definition-group + | mutable-definition-group + | function-definition + | function-definition-group + | effect-definition-group; + +// TODO: cannot do: +// type alias a int|fn () string|error +// needs grouping of type-set + +type-alias = "type" wsnlc* "alias" wsnlc* static-symbol wsnlc* type-set; +type-constraint = "type" wsnlc* static-symbol wsnlc* type-set; + +statement-group:alias = "(" wsnlc* statement wsnlc* ")"; + +statement:alias = send + | close + | panic + | require + | loop-control + | go + | loop + | assignment + | definition + | expression + | type-alias + | type-constraint + | statement-group; + +shebang-command = [^\n]*; +shebang = "#!" shebang-command "\n"; +sep:alias = wsc* (";" | "\n") (wsnlc | ";")*; +statements:alias = statement (sep statement)*; +mml:root = shebang? (wsnlc | ";")* statements? (wsnlc | ";")*; diff --git a/mml_test.go b/mml_test.go new file mode 100644 index 0000000..19c8678 --- /dev/null +++ b/mml_test.go @@ -0,0 +1,2791 @@ +package parse + +import "testing" + +func TestMML(t *testing.T) { + test(t, "mml.p", "mml", []testItem{{ + msg: "empty", + node: &Node{Name: "mml"}, + }, { + msg: "single line comment", + text: "// foo bar baz", + nodes: []*Node{{ + Name: "comment", + to: 14, + Nodes: []*Node{{ + Name: "line-comment-content", + from: 2, + to: 14, + }}, + }}, + }, { + msg: "multiple line comments", + text: "// foo bar\n// baz qux", + nodes: []*Node{{ + Name: "comment", + to: 21, + Nodes: []*Node{{ + Name: "line-comment-content", + from: 2, + to: 10, + }, { + Name: "line-comment-content", + from: 13, + to: 21, + }}, + }}, + }, { + msg: "block comment", + text: "/* foo bar baz */", + nodes: []*Node{{ + Name: "comment", + to: 17, + Nodes: []*Node{{ + Name: "block-comment-content", + from: 2, + to: 15, + }}, + }}, + }, { + msg: "block comments", + text: "/* foo bar */\n/* baz qux */", + nodes: []*Node{{ + Name: "comment", + to: 27, + Nodes: []*Node{{ + Name: "block-comment-content", + from: 2, + to: 11, + }, { + Name: "block-comment-content", + from: 16, + to: 25, + }}, + }}, + }, { + msg: "mixed comments", + text: "// foo\n/* bar */\n// baz", + nodes: []*Node{{ + Name: "comment", + to: 23, + Nodes: []*Node{{ + Name: "line-comment-content", + from: 2, + to: 6, + }, { + Name: "block-comment-content", + from: 9, + to: 14, + }, { + Name: "line-comment-content", + from: 19, + to: 23, + }}, + }}, + }, { + msg: "int", + text: "42", + nodes: []*Node{{ + Name: "int", + to: 2, + }}, + }, { + msg: "ints", + text: "1; 2; 3", + nodes: []*Node{{ + Name: "int", + to: 1, + }, { + Name: "int", + from: 3, + to: 4, + }, { + Name: "int", + from: 6, + to: 7, + }}, + }, { + msg: "int, octal", + text: "052", + nodes: []*Node{{ + Name: "int", + to: 3, + }}, + }, { + msg: "int, hexa", + text: "0x2a", + nodes: []*Node{{ + Name: "int", + to: 4, + }}, + }, { + msg: "float, 0.", + text: "0.", + nodes: []*Node{{ + Name: "float", + to: 2, + }}, + }, { + msg: "float, 72.40", + text: "72.40", + nodes: []*Node{{ + Name: "float", + to: 5, + }}, + }, { + msg: "float, 072.40", + text: "072.40", + nodes: []*Node{{ + Name: "float", + to: 6, + }}, + }, { + msg: "float, 2.71828", + text: "2.71828", + nodes: []*Node{{ + Name: "float", + to: 7, + }}, + }, { + msg: "float, 6.67428e-11", + text: "6.67428e-11", + nodes: []*Node{{ + Name: "float", + to: 11, + }}, + }, { + msg: "float, 1E6", + text: "1E6", + nodes: []*Node{{ + Name: "float", + to: 3, + }}, + }, { + msg: "float, .25", + text: ".25", + nodes: []*Node{{ + Name: "float", + to: 3, + }}, + }, { + msg: "float, .12345E+5", + text: ".12345E+5", + nodes: []*Node{{ + Name: "float", + to: 9, + }}, + }, { + msg: "string, empty", + text: "\"\"", + nodes: []*Node{{ + Name: "string", + to: 2, + }}, + }, { + msg: "string", + text: "\"foo\"", + nodes: []*Node{{ + Name: "string", + to: 5, + }}, + }, { + msg: "string, with new line", + text: "\"foo\nbar\"", + nodes: []*Node{{ + Name: "string", + to: 9, + }}, + }, { + msg: "string, with escaped new line", + text: "\"foo\\nbar\"", + nodes: []*Node{{ + Name: "string", + to: 10, + }}, + }, { + msg: "string, with quotes", + text: "\"foo \\\"bar\\\" baz\"", + nodes: []*Node{{ + Name: "string", + to: 17, + }}, + }, { + msg: "bool, true", + text: "true", + nodes: []*Node{{ + Name: "true", + to: 4, + }}, + }, { + msg: "bool, false", + text: "false", + nodes: []*Node{{ + Name: "false", + to: 5, + }}, + }, { + msg: "symbol", + text: "foo", + nodes: []*Node{{ + Name: "symbol", + to: 3, + }}, + }, { + msg: "dynamic-symbol", + text: "symbol(a)", + nodes: []*Node{{ + Name: "dynamic-symbol", + to: 9, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }}, + }}, + }, { + msg: "empty list", + text: "[]", + nodes: []*Node{{ + Name: "list", + to: 2, + }}, + }, { + msg: "list", + text: "[a, b, c]", + nodes: []*Node{{ + Name: "list", + to: 9, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 2, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 7, + to: 8, + }}, + }}, + }, { + msg: "list, new lines", + text: `[ + a + b + c + ]`, + nodes: []*Node{{ + Name: "list", + to: 20, + Nodes: []*Node{{ + Name: "symbol", + from: 5, + to: 6, + }, { + Name: "symbol", + from: 10, + to: 11, + }, { + Name: "symbol", + from: 15, + to: 16, + }}, + }}, + }, { + msg: "list, complex", + text: "[a, b, c..., [d, e], [f, [g]]...]", + nodes: []*Node{{ + Name: "list", + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 2, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "spread-expression", + from: 7, + to: 11, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }}, + }, { + Name: "list", + from: 13, + to: 19, + Nodes: []*Node{{ + Name: "symbol", + from: 14, + to: 15, + }, { + Name: "symbol", + from: 17, + to: 18, + }}, + }, { + Name: "spread-expression", + from: 21, + to: 32, + Nodes: []*Node{{ + Name: "list", + from: 21, + to: 29, + Nodes: []*Node{{ + Name: "symbol", + from: 22, + to: 23, + }, { + Name: "list", + from: 25, + to: 28, + Nodes: []*Node{{ + Name: "symbol", + from: 26, + to: 27, + }}, + }}, + }}, + }}, + }}, + }, { + msg: "mutable list", + text: "~[a, b, c]", + nodes: []*Node{{ + Name: "mutable-list", + to: 10, + Nodes: []*Node{{ + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "symbol", + from: 5, + to: 6, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "empty struct", + text: "{}", + nodes: []*Node{{ + Name: "struct", + to: 2, + }}, + }, { + msg: "struct", + text: "{foo: 1, \"bar\": 2, symbol(baz): 3, [qux]: 4}", + nodes: []*Node{{ + Name: "struct", + to: 44, + Nodes: []*Node{{ + Name: "entry", + from: 1, + to: 7, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 4, + }, { + Name: "int", + from: 6, + to: 7, + }}, + }, { + Name: "entry", + from: 9, + to: 17, + Nodes: []*Node{{ + Name: "string", + from: 9, + to: 14, + }, { + Name: "int", + from: 16, + to: 17, + }}, + }, { + Name: "entry", + from: 19, + to: 33, + Nodes: []*Node{{ + Name: "dynamic-symbol", + from: 19, + to: 30, + Nodes: []*Node{{ + Name: "symbol", + from: 26, + to: 29, + }}, + }, { + Name: "int", + from: 32, + to: 33, + }}, + }, { + Name: "entry", + from: 35, + to: 43, + Nodes: []*Node{{ + Name: "indexer-symbol", + from: 35, + to: 40, + Nodes: []*Node{{ + Name: "symbol", + from: 36, + to: 39, + }}, + }, { + Name: "int", + from: 42, + to: 43, + }}, + }}, + }}, + }, { + msg: "struct, complex", + text: "{foo: 1, {bar: 2}..., {baz: {}}...}", + nodes: []*Node{{ + Name: "struct", + to: 35, + Nodes: []*Node{{ + Name: "entry", + from: 1, + to: 7, + Nodes: []*Node{{ + Name: "symbol", + from: 1, + to: 4, + }, { + Name: "int", + from: 6, + to: 7, + }}, + }, { + Name: "spread-expression", + from: 9, + to: 20, + Nodes: []*Node{{ + Name: "struct", + from: 9, + to: 17, + Nodes: []*Node{{ + Name: "entry", + from: 10, + to: 16, + Nodes: []*Node{{ + Name: "symbol", + from: 10, + to: 13, + }, { + Name: "int", + from: 15, + to: 16, + }}, + }}, + }}, + }, { + Name: "spread-expression", + from: 22, + to: 34, + Nodes: []*Node{{ + Name: "struct", + from: 22, + to: 31, + Nodes: []*Node{{ + Name: "entry", + from: 23, + to: 30, + Nodes: []*Node{{ + Name: "symbol", + from: 23, + to: 26, + }, { + Name: "struct", + from: 28, + to: 30, + }}, + }}, + }}, + }}, + }}, + }, { + msg: "struct with indexer key", + text: "{[a]: b}", + nodes: []*Node{{ + Name: "struct", + to: 8, + Nodes: []*Node{{ + Name: "entry", + from: 1, + to: 7, + Nodes: []*Node{{ + Name: "indexer-symbol", + from: 1, + to: 4, + Nodes: []*Node{{ + Name: "symbol", + from: 2, + to: 3, + }}, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }}, + }, { + msg: "mutable struct", + text: "~{foo: 1}", + nodes: []*Node{{ + Name: "mutable-struct", + to: 9, + Nodes: []*Node{{ + Name: "entry", + from: 2, + to: 8, + Nodes: []*Node{{ + Name: "symbol", + from: 2, + to: 5, + }, { + Name: "int", + from: 7, + to: 8, + }}, + }}, + }}, + }, { + msg: "channel", + text: "<>", + nodes: []*Node{{ + Name: "channel", + to: 2, + }}, + }, { + msg: "buffered channel", + text: "<42>", + nodes: []*Node{{ + Name: "channel", + to: 4, + Nodes: []*Node{{ + Name: "int", + from: 1, + to: 3, + }}, + }}, + }, { + msg: "and expression", + text: "and(a, b, c)", + nodes: []*Node{{ + Name: "function-application", + to: 12, + Nodes: []*Node{{ + Name: "symbol", + to: 3, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "symbol", + from: 10, + to: 11, + }}, + }}, + }, { + msg: "or expression", + text: "or(a, b, c)", + nodes: []*Node{{ + Name: "function-application", + to: 11, + Nodes: []*Node{{ + Name: "symbol", + to: 2, + }, { + Name: "symbol", + from: 3, + to: 4, + }, { + Name: "symbol", + from: 6, + to: 7, + }, { + Name: "symbol", + from: 9, + to: 10, + }}, + }}, + }, { + msg: "function", + text: "fn () 42", + nodes: []*Node{{ + Name: "function", + to: 8, + Nodes: []*Node{{ + Name: "int", + from: 6, + to: 8, + }}, + }}, + }, { + msg: "function, noop", + text: "fn () {;}", + nodes: []*Node{{ + Name: "function", + to: 9, + Nodes: []*Node{{ + Name: "block", + from: 6, + to: 9, + }}, + }}, + }, { + msg: "function with args", + text: "fn (a, b, c) [a, b, c]", + nodes: []*Node{{ + Name: "function", + to: 22, + Nodes: []*Node{{ + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "symbol", + from: 10, + to: 11, + }, { + Name: "list", + from: 13, + to: 22, + Nodes: []*Node{{ + Name: "symbol", + from: 14, + to: 15, + }, { + Name: "symbol", + from: 17, + to: 18, + }, { + Name: "symbol", + from: 20, + to: 21, + }}, + }}, + }}, + }, { + msg: "function with args in new lines", + text: `fn ( + a + b + c + ) [a, b, c]`, + nodes: []*Node{{ + Name: "function", + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 8, + to: 9, + }, { + Name: "symbol", + from: 13, + to: 14, + }, { + Name: "symbol", + from: 18, + to: 19, + }, { + Name: "list", + from: 24, + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 25, + to: 26, + }, { + Name: "symbol", + from: 28, + to: 29, + }, { + Name: "symbol", + from: 31, + to: 32, + }}, + }}, + }}, + }, { + msg: "function with spread arg", + text: "fn (a, b, ...c) [a, b, c]", + nodes: []*Node{{ + Name: "function", + to: 25, + Nodes: []*Node{{ + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "collect-symbol", + from: 10, + to: 14, + Nodes: []*Node{{ + Name: "symbol", + from: 13, + to: 14, + }}, + }, { + Name: "list", + from: 16, + to: 25, + Nodes: []*Node{{ + Name: "symbol", + from: 17, + to: 18, + }, { + Name: "symbol", + from: 20, + to: 21, + }, { + Name: "symbol", + from: 23, + to: 24, + }}, + }}, + }}, + }, { + msg: "effect", + text: "fn ~ () 42", + nodes: []*Node{{ + Name: "effect", + to: 10, + Nodes: []*Node{{ + Name: "int", + from: 8, + to: 10, + }}, + }}, + }, { + msg: "indexer", + text: "a[42]", + nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "int", + from: 2, + to: 4, + }}, + }}, + }, { + msg: "range indexer", + text: "a[3:9]", + nodes: []*Node{{ + Name: "indexer", + to: 6, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "range-from", + from: 2, + to: 3, + Nodes: []*Node{{ + Name: "int", + from: 2, + to: 3, + }}, + }, { + Name: "range-to", + from: 4, + to: 5, + Nodes: []*Node{{ + Name: "int", + from: 4, + to: 5, + }}, + }}, + }}, + }, { + msg: "range indexer, lower unbound", + text: "a[:9]", + nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "range-to", + from: 3, + to: 4, + Nodes: []*Node{{ + Name: "int", + from: 3, + to: 4, + }}, + }}, + }}, + }, { + msg: "range indexer, upper unbound", + text: "a[3:]", + nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "range-from", + from: 2, + to: 3, + Nodes: []*Node{{ + Name: "int", + from: 2, + to: 3, + }}, + }}, + }}, + }, { + msg: "indexer, chained", + text: "a[b][c][d]", + nodes: []*Node{{ + Name: "indexer", + to: 10, + Nodes: []*Node{{ + Name: "indexer", + to: 7, + Nodes: []*Node{{ + Name: "indexer", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }, { + Name: "symbol", + from: 5, + to: 6, + }}, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "symbol indexer", + text: "a.b", + nodes: []*Node{{ + Name: "indexer", + to: 3, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }}, + }, { + msg: "symbol indexer, with string", + text: "a.\"b\"", + nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "string", + from: 2, + to: 5, + }}, + }}, + }, { + msg: "symbol indexer, with dynamic symbol", + text: "a.symbol(b)", + nodes: []*Node{{ + Name: "indexer", + to: 11, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "dynamic-symbol", + from: 2, + to: 11, + Nodes: []*Node{{ + Name: "symbol", + from: 9, + to: 10, + }}, + }}, + }}, + }, { + msg: "chained symbol indexer", + text: "a.b.c.d", + nodes: []*Node{{ + Name: "indexer", + to: 7, + Nodes: []*Node{{ + Name: "indexer", + to: 5, + Nodes: []*Node{{ + Name: "indexer", + to: 3, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }, { + Name: "symbol", + from: 4, + to: 5, + }}, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "chained symbol indexer on new line", + text: "a\n.b\n.c", + nodes: []*Node{{ + Name: "indexer", + to: 7, + Nodes: []*Node{{ + Name: "indexer", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 3, + to: 4, + }}, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "chained symbol indexer on new line after dot", + text: "a.\nb.\nc", + nodes: []*Node{{ + Name: "indexer", + to: 7, + Nodes: []*Node{{ + Name: "indexer", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 3, + to: 4, + }}, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "function application", + text: "f()", + nodes: []*Node{{ + Name: "function-application", + to: 3, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }}, + }}, + }, { + msg: "function application, single arg", + text: "f(a)", + nodes: []*Node{{ + Name: "function-application", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }}, + }, { + msg: "function application, multiple args", + text: "f(a, b, c)", + nodes: []*Node{{ + Name: "function-application", + to: 10, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "symbol", + from: 5, + to: 6, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "function application, multiple args, new line", + text: "f(a\nb\nc\n)", + nodes: []*Node{{ + Name: "function-application", + to: 9, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "function application, spread", + text: "f(a, b..., c, d...)", + nodes: []*Node{{ + Name: "function-application", + to: 19, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "spread-expression", + from: 5, + to: 9, + Nodes: []*Node{{ + Name: "symbol", + from: 5, + to: 6, + }}, + }, { + Name: "symbol", + from: 11, + to: 12, + }, { + Name: "spread-expression", + from: 14, + to: 18, + Nodes: []*Node{{ + Name: "symbol", + from: 14, + to: 15, + }}, + }}, + }}, + }, { + msg: "chained function application", + text: "f(a)(b)(c)", + nodes: []*Node{{ + Name: "function-application", + to: 10, + Nodes: []*Node{{ + Name: "function-application", + to: 7, + Nodes: []*Node{{ + Name: "function-application", + to: 4, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 2, + to: 3, + }}, + }, { + Name: "symbol", + from: 5, + to: 6, + }}, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "embedded function application", + text: "f(g(h(a)))", + nodes: []*Node{{ + Name: "function-application", + to: 10, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "function-application", + from: 2, + to: 9, + Nodes: []*Node{{ + Name: "symbol", + from: 2, + to: 3, + }, { + Name: "function-application", + from: 4, + to: 8, + Nodes: []*Node{{ + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }}, + }}, + }, { + msg: "if", + text: "if a { b() }", + nodes: []*Node{{ + Name: "if", + to: 12, + Nodes: []*Node{{ + Name: "symbol", + from: 3, + to: 4, + }, { + Name: "block", + from: 5, + to: 12, + Nodes: []*Node{{ + Name: "function-application", + from: 7, + to: 10, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }}, + }}, + }}, + }}, + }, { + msg: "if, else", + text: "if a { b } else { c }", + nodes: []*Node{{ + Name: "if", + to: 21, + Nodes: []*Node{{ + Name: "symbol", + from: 3, + to: 4, + }, { + Name: "block", + from: 5, + to: 10, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }}, + }, { + Name: "block", + from: 16, + to: 21, + Nodes: []*Node{{ + Name: "symbol", + from: 18, + to: 19, + }}, + }}, + }}, + }, { + msg: "if, else if, else if, else", + text: ` + if a { b } + else if c { d } + else if e { f } + else { g } + `, + nodes: []*Node{{ + Name: "if", + from: 4, + to: 66, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "block", + from: 9, + to: 14, + Nodes: []*Node{{ + Name: "symbol", + from: 11, + to: 12, + }}, + }, { + Name: "symbol", + from: 26, + to: 27, + }, { + Name: "block", + from: 28, + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 30, + to: 31, + }}, + }, { + Name: "symbol", + from: 45, + to: 46, + }, { + Name: "block", + from: 47, + to: 52, + Nodes: []*Node{{ + Name: "symbol", + from: 49, + to: 50, + }}, + }, { + Name: "block", + from: 61, + to: 66, + Nodes: []*Node{{ + Name: "symbol", + from: 63, + to: 64, + }}, + }}, + }}, + }, { + msg: "switch, empty", + text: "switch {default:}", + nodes: []*Node{{ + Name: "switch", + to: 17, + Nodes: []*Node{{ + Name: "default", + from: 8, + to: 16, + }}, + }}, + }, { + msg: "switch, single case", + text: "switch a {case b: c}", + nodes: []*Node{{ + Name: "switch", + to: 20, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "case", + from: 10, + to: 17, + Nodes: []*Node{{ + Name: "symbol", + from: 15, + to: 16, + }}, + }, { + Name: "symbol", + from: 18, + to: 19, + }}, + }}, + }, { + msg: "switch", + text: "switch a {case b: c; case d: e; default: f}", + nodes: []*Node{{ + Name: "switch", + to: 43, + Nodes: []*Node{{ + Name: "symbol", + from: 7, + to: 8, + }, { + Name: "case", + from: 10, + to: 17, + Nodes: []*Node{{ + Name: "symbol", + from: 15, + to: 16, + }}, + }, { + Name: "symbol", + from: 18, + to: 19, + }, { + Name: "case", + from: 21, + to: 28, + Nodes: []*Node{{ + Name: "symbol", + from: 26, + to: 27, + }}, + }, { + Name: "symbol", + from: 29, + to: 30, + }, { + Name: "default", + from: 32, + to: 40, + }, { + Name: "symbol", + from: 41, + to: 42, + }}, + }}, + }, { + msg: "switch, all new lines", + text: `switch + a + { + case + b + : + c + case + d + : + e + default + : + f + }`, + nodes: []*Node{{ + Name: "switch", + to: 87, + Nodes: []*Node{{ + Name: "symbol", + from: 10, + to: 11, + }, { + Name: "case", + from: 20, + to: 34, + Nodes: []*Node{{ + Name: "symbol", + from: 28, + to: 29, + }}, + }, { + Name: "symbol", + from: 38, + to: 39, + }, { + Name: "case", + from: 43, + to: 57, + Nodes: []*Node{{ + Name: "symbol", + from: 51, + to: 52, + }}, + }, { + Name: "symbol", + from: 61, + to: 62, + }, { + Name: "default", + from: 66, + to: 78, + }, { + Name: "symbol", + from: 82, + to: 83, + }}, + }}, + }, { + msg: "match expression, empty", + text: "match a {}", + nodes: []*Node{{ + Name: "match", + to: 10, + Nodes: []*Node{{ + Name: "symbol", + from: 6, + to: 7, + }}, + }}, + }, { + msg: "match expression", + text: `match a { + case [first, ...rest]: first + }`, + nodes: []*Node{{ + Name: "match", + to: 45, + Nodes: []*Node{{ + Name: "symbol", + from: 6, + to: 7, + }, { + Name: "match-case", + from: 13, + to: 35, + Nodes: []*Node{{ + Name: "list-type", + from: 18, + to: 34, + Nodes: []*Node{{ + Name: "list-destructure-type", + from: 19, + to: 33, + Nodes: []*Node{{ + Name: "destructure-item", + from: 19, + to: 24, + Nodes: []*Node{{ + Name: "symbol", + from: 19, + to: 24, + }}, + }, { + Name: "collect-destructure-item", + from: 26, + to: 33, + Nodes: []*Node{{ + Name: "destructure-item", + from: 29, + to: 33, + Nodes: []*Node{{ + Name: "symbol", + from: 29, + to: 33, + }}, + }}, + }}, + }}, + }}, + }, { + Name: "symbol", + from: 36, + to: 41, + }}, + }}, + }, { + msg: "match expression, multiple cases", + text: `match a { + case [0]: [] + case [2:]: a[2:] + default: error("invalid length") + }`, + nodes: []*Node{{ + Name: "match", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "list-type", + Nodes: []*Node{{ + Name: "items-type", + Nodes: []*Node{{ + Name: "items-quantifier", + Nodes: []*Node{{ + Name: "int", + }}, + }}, + }}, + }}, + }, { + Name: "list", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "list-type", + Nodes: []*Node{{ + Name: "items-type", + Nodes: []*Node{{ + Name: "items-quantifier", + Nodes: []*Node{{ + Name: "static-range-from", + Nodes: []*Node{{ + Name: "int", + }}, + }}, + }}, + }}, + }}, + }, { + Name: "indexer", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "range-from", + Nodes: []*Node{{ + Name: "int", + }}, + }}, + }, { + Name: "default", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "match function", + text: `match a { + case fn () int: a() + default: 42 + }`, + nodes: []*Node{{ + Name: "match", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "function-type", + Nodes: []*Node{{ + Name: "int-type", + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "default", + }, { + Name: "int", + }}, + }}, + ignorePosition: true, + }, { + msg: "match expression, combined", + text: `match a { + case [fn (int)]: a[0]() + default: 42 + }`, + nodes: []*Node{{ + Name: "match", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "list-type", + Nodes: []*Node{{ + Name: "items-type", + Nodes: []*Node{{ + Name: "function-type", + Nodes: []*Node{{ + Name: "arg-type", + Nodes: []*Node{{ + Name: "int-type", + }}, + }}, + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "indexer", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "int", + }}, + }}, + }, { + Name: "default", + }, { + Name: "int", + }}, + }}, + ignorePosition: true, + }, { + msg: "match expression, complex", + text: `match a { + case [first T int|string, op fn ([T, int, ...T]) int, ...rest T]: + op([first, now(), rest...]) + default: + error("invalid list") + }`, + nodes: []*Node{{ + Name: "match", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "match-case", + Nodes: []*Node{{ + Name: "list-match", + Nodes: []*Node{{ + Name: "list-destructure-match", + Nodes: []*Node{{ + Name: "destructure-match-item", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "int-type", + }, { + Name: "string-type", + }}, + }, { + Name: "destructure-match-item", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-type", + Nodes: []*Node{{ + Name: "arg-type", + Nodes: []*Node{{ + Name: "list-type", + Nodes: []*Node{{ + Name: "list-destructure-type", + Nodes: []*Node{{ + Name: "destructure-item", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "destructure-item", + Nodes: []*Node{{ + Name: "int-type", + }}, + }, { + Name: "collect-destructure-item", + Nodes: []*Node{{ + Name: "destructure-item", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }}, + }}, + }, { + Name: "int-type", + }}, + }}, + }, { + Name: "collect-destructure-match-item", + Nodes: []*Node{{ + Name: "destructure-match-item", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "spread-expression", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "default", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "receive op", + text: "<-chan", + nodes: []*Node{{ + Name: "unary-expression", + Nodes: []*Node{{ + Name: "receive-op", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "send op", + text: "chan <- a", + nodes: []*Node{{ + Name: "send", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "select, empty", + text: `select { + }`, + nodes: []*Node{{ + Name: "select", + to: 12, + }}, + }, { + msg: "select", + text: `select { + case let a <-r: s <- a + case s <- f(): g() + default: h() + }`, + nodes: []*Node{{ + Name: "select", + Nodes: []*Node{{ + Name: "select-case", + Nodes: []*Node{{ + Name: "receive-definition", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "receive-op", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "send", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "select-case", + Nodes: []*Node{{ + Name: "send", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "default", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "select, call", + text: `select { + case let a receive(r): f() + case send(s, g()): h() + default: i() + }`, + nodes: []*Node{{ + Name: "select", + Nodes: []*Node{{ + Name: "select-case", + Nodes: []*Node{{ + Name: "receive-definition", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "receive-call", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "select-case", + Nodes: []*Node{{ + Name: "send", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "default", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "go", + text: "go f()", + nodes: []*Node{{ + Name: "go", + Nodes: []*Node{{ + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require, dot, equal", + text: "require . = \"mml/foo\"", + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "require-inline", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require, symbol, equal", + text: "require bar = \"mml/foo\"", + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require, symbol", + text: "require bar \"mml/foo\"", + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require", + text: "require \"mml/foo\"", + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "require, group", + text: `require ( + . = "mml/foo" + bar = "mml/foo" + . "mml/foo" + bar "mml/foo" + "mml/foo" + )`, + nodes: []*Node{{ + Name: "require", + Nodes: []*Node{{ + Name: "require-fact", + Nodes: []*Node{{ + Name: "require-inline", + }, { + Name: "string", + }}, + }, { + Name: "require-fact", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }, { + Name: "require-fact", + Nodes: []*Node{{ + Name: "require-inline", + }, { + Name: "string", + }}, + }, { + Name: "require-fact", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "string", + }}, + }, { + Name: "require-fact", + Nodes: []*Node{{ + Name: "string", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "expression group", + text: "(fn (a) a)(a)", + nodes: []*Node{{ + Name: "function-application", + Nodes: []*Node{{ + Name: "function", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "unary operator", + text: "!foo", + nodes: []*Node{{ + Name: "unary-expression", + Nodes: []*Node{{ + Name: "logical-not", + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "binary 0", + text: "a * b", + nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "binary 1", + text: "a * b + c * d", + nodes: []*Node{{ + Name: "binary1", + Nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }, { + Name: "add", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "binary 2", + text: "a * b + c * d == e * f", + nodes: []*Node{{ + Name: "binary2", + Nodes: []*Node{{ + Name: "binary1", + Nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }, { + Name: "add", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "eq", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "binary 3, 4, 5", + text: "a * b + c * d == e * f && g || h -> f()", + nodes: []*Node{{ + Name: "binary5", + Nodes: []*Node{{ + Name: "binary4", + Nodes: []*Node{{ + Name: "binary3", + Nodes: []*Node{{ + Name: "binary2", + Nodes: []*Node{{ + Name: "binary1", + Nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }, { + Name: "add", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "eq", + }, { + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mul", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "logical-and", + }, { + Name: "symbol", + }}, + }, { + Name: "logical-or", + }, { + Name: "symbol", + }}, + }, { + Name: "chain", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "ternary expression", + text: "a ? b : c", + nodes: []*Node{{ + Name: "ternary-expression", + to: 9, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 8, + to: 9, + }}, + }}, + }, { + msg: "multiple ternary expressions, consequence", + text: "a ? b ? c : d : e", + nodes: []*Node{{ + Name: "ternary-expression", + to: 17, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "ternary-expression", + from: 4, + to: 13, + Nodes: []*Node{{ + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "symbol", + from: 8, + to: 9, + }, { + Name: "symbol", + from: 12, + to: 13, + }}, + }, { + Name: "symbol", + from: 16, + to: 17, + }}, + }}, + }, { + msg: "multiple ternary expressions, alternative", + text: "a ? b : c ? d : e", + nodes: []*Node{{ + Name: "ternary-expression", + to: 17, + Nodes: []*Node{{ + Name: "symbol", + to: 1, + }, { + Name: "symbol", + from: 4, + to: 5, + }, { + Name: "ternary-expression", + from: 8, + to: 17, + Nodes: []*Node{{ + Name: "symbol", + from: 8, + to: 9, + }, { + Name: "symbol", + from: 12, + to: 13, + }, { + Name: "symbol", + from: 16, + to: 17, + }}, + }}, + }}, + }, { + msg: "infinite loop", + text: "for {}", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "block", + }}, + }}, + ignorePosition: true, + }, { + msg: "conditional loop", + text: "for foo {}", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "loop-expression", + Nodes: []*Node{{ + Name: "symbol", + }}, + }, { + Name: "block", + }}, + }}, + ignorePosition: true, + }, { + msg: "in list loop", + text: "for i in [1, 2, 3] {}", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "loop-expression", + Nodes: []*Node{{ + Name: "in-expression", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "int", + }, { + Name: "int", + }, { + Name: "int", + }}, + }}, + }}, + }, { + Name: "block", + }}, + }}, + ignorePosition: true, + }, { + msg: "in range loop", + text: "for i in -3:42 {}", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "loop-expression", + Nodes: []*Node{{ + Name: "in-expression", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "range-from", + Nodes: []*Node{{ + Name: "unary-expression", + Nodes: []*Node{{ + Name: "minus", + }, { + Name: "int", + }}, + }}, + }, { + Name: "range-to", + Nodes: []*Node{{ + Name: "int", + }}, + }}, + }}, + }, { + Name: "block", + }}, + }}, + ignorePosition: true, + }, { + msg: "loop control", + text: `for i in l { + if i % 2 == 0 { + break + } + }`, + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "loop-expression", + Nodes: []*Node{{ + Name: "in-expression", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "block", + Nodes: []*Node{{ + Name: "if", + Nodes: []*Node{{ + Name: "binary2", + Nodes: []*Node{{ + Name: "binary0", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "mod", + }, { + Name: "int", + }}, + }, { + Name: "eq", + }, { + Name: "int", + }}, + }, { + Name: "block", + Nodes: []*Node{{ + Name: "break", + }}, + }}, + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "assign, eq", + text: "a = b", + nodes: []*Node{{ + Name: "assignment", + Nodes: []*Node{{ + Name: "assign-equal", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "assign, set, eq", + text: "set a = b", + nodes: []*Node{{ + Name: "assignment", + Nodes: []*Node{{ + Name: "assign-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "assign, set", + text: "set a b", + nodes: []*Node{{ + Name: "assignment", + Nodes: []*Node{{ + Name: "assign-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "assign, group", + text: `set ( + a = b + c d + )`, + nodes: []*Node{{ + Name: "assignment", + Nodes: []*Node{{ + Name: "assign-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "assign-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define, eq", + text: "let a = b", + nodes: []*Node{{ + Name: "value-definition", + Nodes: []*Node{{ + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define", + text: "let a b", + nodes: []*Node{{ + Name: "value-definition", + Nodes: []*Node{{ + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define mutable, eq", + text: "let ~ a = b", + nodes: []*Node{{ + Name: "value-definition", + Nodes: []*Node{{ + Name: "mutable-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define mutable", + text: "let ~ a b", + nodes: []*Node{{ + Name: "value-definition", + Nodes: []*Node{{ + Name: "mutable-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "mixed define group", + text: `let ( + a = b + c d + ~ e f + ~ g h + )`, + nodes: []*Node{{ + Name: "value-definition-group", + Nodes: []*Node{{ + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "mutable-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "mutable-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "mutable define group", + text: `let ~ ( + a = b + c d + )`, + nodes: []*Node{{ + Name: "mutable-definition-group", + Nodes: []*Node{{ + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "value-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define function", + text: "fn a() b", + nodes: []*Node{{ + Name: "function-definition", + Nodes: []*Node{{ + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define effect", + text: "fn ~ a() b", + nodes: []*Node{{ + Name: "function-definition", + Nodes: []*Node{{ + Name: "effect-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define function group", + text: `fn ( + a() b + ~ c() d + )`, + nodes: []*Node{{ + Name: "function-definition-group", + Nodes: []*Node{{ + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "effect-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "define effect group", + text: `fn ~ ( + a() b + c() d + )`, + nodes: []*Node{{ + Name: "effect-definition-group", + Nodes: []*Node{{ + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "type constraint", + text: ` + type a fn ([]) int + fn a(l) len(l) + `, + nodes: []*Node{{ + Name: "type-constraint", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "function-type", + Nodes: []*Node{{ + Name: "arg-type", + Nodes: []*Node{{ + Name: "list-type", + }}, + }, { + Name: "int-type", + }}, + }}, + }, { + Name: "function-definition", + Nodes: []*Node{{ + Name: "function-capture", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "function-application", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + }}, + ignorePosition: true, + }, { + msg: "type alias", + text: "type alias a int|(fn () int|string)|string", + nodes: []*Node{{ + Name: "type-alias", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "int-type", + }, { + Name: "function-type", + Nodes: []*Node{{ + Name: "int-type", + }, { + Name: "string-type", + }}, + }, { + Name: "string-type", + }}, + }}, + ignorePosition: true, + }, { + msg: "statement group", + text: "(for {})", + nodes: []*Node{{ + Name: "loop", + Nodes: []*Node{{ + Name: "block", + }}, + }}, + ignorePosition: true, + }}) +} diff --git a/next_test.go b/next_test.go new file mode 100644 index 0000000..6439f75 --- /dev/null +++ b/next_test.go @@ -0,0 +1,740 @@ +package parse + +import ( + "bytes" + "io" + "os" + "testing" + "time" +) + +type testItem struct { + msg string + text string + fail bool + node *Node + nodes []*Node + ignorePosition bool +} + +func testSyntaxReader(r io.Reader, traceLevel int) (*Syntax, error) { + trace := NewTrace(0) + + b, err := bootSyntax(trace) + if err != nil { + return nil, err + } + + doc, err := b.Parse(r) + if err != nil { + return nil, err + } + + trace = NewTrace(traceLevel) + s := NewSyntax(trace) + if err := define(s, doc); err != nil { + return nil, err + } + + if err := s.Init(); err != nil { + return nil, err + } + + return s, nil +} + +func testSyntaxString(s string, traceLevel int) (*Syntax, error) { + return testSyntaxReader(bytes.NewBufferString(s), traceLevel) +} + +func testSyntax(file string, traceLevel int) (*Syntax, error) { + f, err := os.Open(file) + if err != nil { + return nil, err + } + + defer f.Close() + return testSyntaxReader(f, traceLevel) +} + +func checkNodesPosition(t *testing.T, left, right []*Node, position bool) { + if len(left) != len(right) { + t.Error("length doesn't match", len(left), len(right)) + return + } + + for len(left) > 0 { + checkNodePosition(t, left[0], right[0], position) + if t.Failed() { + return + } + + left, right = left[1:], right[1:] + } +} + +func checkNodePosition(t *testing.T, left, right *Node, position bool) { + if (left == nil) != (right == nil) { + t.Error("nil reference doesn't match", left == nil, right == nil) + return + } + + if left == nil { + return + } + + if left.Name != right.Name { + t.Error("name doesn't match", left.Name, right.Name) + return + } + + if position && left.from != right.from { + t.Error("from doesn't match", left.Name, left.from, right.from) + return + } + + if position && left.to != right.to { + t.Error("to doesn't match", left.Name, left.to, right.to) + return + } + + if len(left.Nodes) != len(right.Nodes) { + t.Error("length doesn't match", left.Name, len(left.Nodes), len(right.Nodes)) + t.Log(left) + t.Log(right) + for { + if len(left.Nodes) > 0 { + t.Log("<", left.Nodes[0]) + left.Nodes = left.Nodes[1:] + } + + if len(right.Nodes) > 0 { + t.Log(">", right.Nodes[0]) + right.Nodes = right.Nodes[1:] + } + + if len(left.Nodes) == 0 && len(right.Nodes) == 0 { + break + } + } + return + } + + checkNodesPosition(t, left.Nodes, right.Nodes, position) +} + +func checkNodes(t *testing.T, left, right []*Node) { + checkNodesPosition(t, left, right, true) +} + +func checkNode(t *testing.T, left, right *Node) { + checkNodePosition(t, left, right, true) +} + +func checkNodesIgnorePosition(t *testing.T, left, right []*Node) { + checkNodesPosition(t, left, right, false) +} + +func checkNodeIgnorePosition(t *testing.T, left, right *Node) { + checkNodePosition(t, left, right, false) +} + +func testReaderTrace(t *testing.T, r io.Reader, rootName string, traceLevel int, tests []testItem) { + s, err := testSyntaxReader(r, traceLevel) + if err != nil { + t.Error(err) + return + } + + start := time.Now() + defer func() { t.Log("\ntotal duration", time.Since(start)) }() + + for _, ti := range tests { + t.Run(ti.msg, func(t *testing.T) { + n, err := s.Parse(bytes.NewBufferString(ti.text)) + + if ti.fail && err == nil { + t.Error("failed to fail") + return + } else if !ti.fail && err != nil { + t.Error(err) + return + } else if ti.fail { + return + } + + t.Log(n) + + cn := checkNode + if ti.ignorePosition { + cn = checkNodeIgnorePosition + } + + if ti.node != nil { + cn(t, n, ti.node) + } else { + cn(t, n, &Node{ + Name: rootName, + from: 0, + to: len(ti.text), + Nodes: ti.nodes, + }) + } + }) + } +} + +func testStringTrace(t *testing.T, s string, traceLevel int, tests []testItem) { + testReaderTrace(t, bytes.NewBufferString(s), "", traceLevel, tests) +} + +func testString(t *testing.T, s string, tests []testItem) { + testStringTrace(t, s, 0, tests) +} + +func testTrace(t *testing.T, file, rootName string, traceLevel int, tests []testItem) { + f, err := os.Open(file) + if err != nil { + t.Error(err) + return + } + + defer f.Close() + testReaderTrace(t, f, rootName, traceLevel, tests) +} + +func test(t *testing.T, file, rootName string, tests []testItem) { + testTrace(t, file, rootName, 0, tests) +} + +func TestRecursion(t *testing.T) { + testString( + t, + `A = "a" | A "a"`, + []testItem{{ + msg: "recursion in choice, right, left, commit", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }}, + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A = "a" | "a" A`, + []testItem{{ + msg: "recursion in choice, right, right, commit", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }}, + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A = "a" A | "a"`, + []testItem{{ + msg: "recursion in choice, left, right, commit", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }}, + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A = A "a" | "a"`, + []testItem{{ + msg: "recursion in choice, left, left, commit", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }}, + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A':alias = "a" | A' "a"; A = A'`, + []testItem{{ + msg: "recursion in choice, right, left, alias", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) + + testString( + t, + `A':alias = "a" | "a" A'; A = A'`, + []testItem{{ + msg: "recursion in choice, right, right, alias", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) + + testString( + t, + `A':alias = "a" A' | "a"; A = A'`, + []testItem{{ + msg: "recursion in choice, left, right, alias", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) + + testString( + t, + `A':alias = A' "a" | "a"; A = A'`, + []testItem{{ + msg: "recursion in choice, left, left, alias", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) +} + +func TestSequence(t *testing.T) { + testString( + t, + `AB = "a" | "a"? "a"? "b" "b"`, + []testItem{{ + msg: "sequence with optional items", + text: "abb", + node: &Node{ + Name: "AB", + to: 3, + }, + }, { + msg: "sequence with optional items, none", + text: "bb", + node: &Node{ + Name: "AB", + to: 2, + }, + }}, + ) + + testString( + t, + `A = "a" | (A?)*`, + []testItem{{ + msg: "sequence in choice with redundant quantifier", + text: "aaa", + node: &Node{ + Name: "A", + Nodes: []*Node{{ + Name: "A", + }, { + Name: "A", + }, { + Name: "A", + }}, + }, + ignorePosition: true, + }}, + ) + + testString( + t, + `A = ("a"*)*`, + []testItem{{ + msg: "sequence with redundant quantifier", + text: "aaa", + node: &Node{ + Name: "A", + to: 3, + }, + }}, + ) +} + +func TestQuantifiers(t *testing.T) { + testString( + t, + `A = "a" "b"{0} "a"`, + []testItem{{ + msg: "zero", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero, fail", + text: "aba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{1} "a"`, + []testItem{{ + msg: "one, missing", + text: "aa", + fail: true, + }, { + msg: "one", + text: "aba", + node: &Node{ + Name: "A", + to: 3, + }, + }, { + msg: "one, too much", + text: "abba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{3} "a"`, + []testItem{{ + msg: "three, missing", + text: "abba", + fail: true, + }, { + msg: "three", + text: "abbba", + node: &Node{ + Name: "A", + to: 5, + }, + }, { + msg: "three, too much", + text: "abbbba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{0,1} "a"`, + []testItem{{ + msg: "zero or one explicit, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or one explicit", + text: "aba", + node: &Node{ + Name: "A", + to: 3, + }, + }, { + msg: "zero or one explicit, too much", + text: "abba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{,1} "a"`, + []testItem{{ + msg: "zero or one explicit, omit zero, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or one explicit, omit zero", + text: "aba", + node: &Node{ + Name: "A", + to: 3, + }, + }, { + msg: "zero or one explicit, omit zero, too much", + text: "abba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"? "a"`, + []testItem{{ + msg: "zero or one explicit, shortcut, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or one explicit, shortcut", + text: "aba", + node: &Node{ + Name: "A", + to: 3, + }, + }, { + msg: "zero or one explicit, shortcut, too much", + text: "abba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{0,3} "a"`, + []testItem{{ + msg: "zero or three, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or three", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }, { + msg: "zero or three", + text: "abbba", + node: &Node{ + Name: "A", + to: 5, + }, + }, { + msg: "zero or three, too much", + text: "abbbba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{,3} "a"`, + []testItem{{ + msg: "zero or three, omit zero, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or three, omit zero", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }, { + msg: "zero or three, omit zero", + text: "abbba", + node: &Node{ + Name: "A", + to: 5, + }, + }, { + msg: "zero or three, omit zero, too much", + text: "abbbba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{1,3} "a"`, + []testItem{{ + msg: "one or three, missing", + text: "aa", + fail: true, + }, { + msg: "one or three", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }, { + msg: "one or three", + text: "abbba", + node: &Node{ + Name: "A", + to: 5, + }, + }, { + msg: "one or three, too much", + text: "abbbba", + fail: true, + }}, + ) + + testString( + t, + `A = "a" "b"{3,5} "a"`, + []testItem{{ + msg: "three or five, missing", + text: "abba", + fail: true, + }, { + msg: "three or five", + text: "abbbba", + node: &Node{ + Name: "A", + to: 6, + }, + }, { + msg: "three or five", + text: "abbbbba", + node: &Node{ + Name: "A", + to: 7, + }, + }, { + msg: "three or five, too much", + text: "abbbbbba", + fail: true, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"{0,} "a"`, + 1, + []testItem{{ + msg: "zero or more, explicit, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or more, explicit", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"* "a"`, + 1, + []testItem{{ + msg: "zero or more, shortcut, missing", + text: "aa", + node: &Node{ + Name: "A", + to: 2, + }, + }, { + msg: "zero or more, shortcut", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"{1,} "a"`, + 1, + []testItem{{ + msg: "one or more, explicit, missing", + text: "aa", + fail: true, + }, { + msg: "one or more, explicit", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"+ "a"`, + 1, + []testItem{{ + msg: "one or more, shortcut, missing", + text: "aa", + fail: true, + }, { + msg: "one or more, shortcut", + text: "abba", + node: &Node{ + Name: "A", + to: 4, + }, + }}, + ) + + testStringTrace( + t, + `A = "a" "b"{3,} "a"`, + 1, + []testItem{{ + msg: "three or more, explicit, missing", + text: "abba", + fail: true, + }, { + msg: "three or more, explicit", + text: "abbbba", + node: &Node{ + Name: "A", + to: 6, + }, + }}, + ) +} diff --git a/node.go b/node.go new file mode 100644 index 0000000..438e334 --- /dev/null +++ b/node.go @@ -0,0 +1,89 @@ +package parse + +import "fmt" + +type Node struct { + Name string + Nodes []*Node + commitType CommitType + from, to int + tokens []rune +} + +func newNode(name string, ct CommitType, from, to int) *Node { + return &Node{ + Name: name, + commitType: ct, + from: from, + to: to, + } +} + +func (n *Node) tokenLength() int { + return n.to - n.from +} + +func (n *Node) nodeLength() int { + return len(n.Nodes) +} + +func findNode(in, n *Node) { + if n == in { + panic(fmt.Errorf("found self in %s", in.Name)) + } + + for _, ni := range n.Nodes { + findNode(in, ni) + } +} + +func (n *Node) append(p *Node) { + findNode(n, p) + n.Nodes = append(n.Nodes, p) + // TODO: check rather if n.from <= p.from??? or panic if less? or check rather node length and commit + // happens in the end anyway? + if n.from == 0 && n.to == 0 { + n.from = p.from + } + + n.to = p.to +} + +func (n *Node) clear() { + n.from = 0 + n.to = 0 + n.Nodes = nil +} + +func (n *Node) applyTokens(t []rune) { + n.tokens = t + for _, ni := range n.Nodes { + ni.applyTokens(t) + } +} + +func (n *Node) commit() { + var nodes []*Node + for _, ni := range n.Nodes { + ni.commit() + if ni.commitType&Alias != 0 { + nodes = append(nodes, ni.Nodes...) + } else { + nodes = append(nodes, ni) + } + } + + n.Nodes = nodes +} + +func (n *Node) String() string { + if n.from >= len(n.tokens) || n.to > len(n.tokens) { + return n.Name + ":incomplete" + } + + return fmt.Sprintf("%s:%d:%d:%s", n.Name, n.from, n.to, n.Text()) +} + +func (n *Node) Text() string { + return string(n.tokens[n.from:n.to]) +} diff --git a/parse.go b/parse.go new file mode 100644 index 0000000..4929ab2 --- /dev/null +++ b/parse.go @@ -0,0 +1,69 @@ +package parse + +import ( + "errors" + "fmt" +) + +type definition interface { + nodeName() string + parser(*registry, []string) (parser, error) + commitType() CommitType +} + +type parser interface { + nodeName() string + setIncludedBy(parser, []string) + cacheIncluded(*context, *Node) + parse(Trace, *context) +} + +var errCannotIncludeParsers = errors.New("cannot include parsers") + +func parserNotFound(name string) error { + return fmt.Errorf("parser not found: %s", name) +} + +func stringsContain(ss []string, s string) bool { + for _, si := range ss { + if si == s { + return true + } + } + + return false +} + +func copyIncludes(to, from map[string]CommitType) { + if from == nil { + return + } + + for name, ct := range from { + to[name] = ct + } +} + +func mergeIncludes(left, right map[string]CommitType) map[string]CommitType { + m := make(map[string]CommitType) + copyIncludes(m, left) + copyIncludes(m, right) + return m +} + +func parse(t Trace, p parser, c *context) (*Node, error) { + p.parse(t, c) + if c.readErr != nil { + return nil, c.readErr + } + + if !c.match { + return nil, ErrInvalidInput + } + + if err := c.finalize(); err != nil { + return nil, err + } + + return c.node, nil +} diff --git a/quantifier.go b/quantifier.go new file mode 100644 index 0000000..e18606c --- /dev/null +++ b/quantifier.go @@ -0,0 +1,172 @@ +package parse + +type quantifierDefinition struct { + name string + commit CommitType + min, max int + item string +} + +type quantifierParser struct { + name string + commit CommitType + min, max int + item parser + includedBy []parser +} + +func newQuantifier(name string, ct CommitType, item string, min, max int) *quantifierDefinition { + return &quantifierDefinition{ + name: name, + commit: ct, + min: min, + max: max, + item: item, + } +} + +func (d *quantifierDefinition) nodeName() string { return d.name } + +func (d *quantifierDefinition) parser(r *registry, path []string) (parser, error) { + if stringsContain(path, d.name) { + panic(errCannotIncludeParsers) + } + + p, ok := r.parser(d.name) + if ok { + return p, nil + } + + qp := &quantifierParser{ + name: d.name, + commit: d.commit, + min: d.min, + max: d.max, + } + + r.setParser(qp) + + item, ok := r.parser(d.item) + if !ok { + itemDefinition, ok := r.definition(d.item) + if !ok { + return nil, parserNotFound(d.item) + } + + var err error + item, err = itemDefinition.parser(r, path) + if err != nil { + return nil, err + } + } + + qp.item = item + return qp, nil +} + +func (d *quantifierDefinition) commitType() CommitType { return d.commit } +func (p *quantifierParser) nodeName() string { return p.name } + +// TODO: merge the quantifier into the sequence +// DOC: sequences are hungry and are not revisited, a*a cannot match anything. +// DOC: how to match a tailing a? (..)*a | .(..)*a + +func (p *quantifierParser) setIncludedBy(i parser, path []string) { + if stringsContain(path, p.name) { + panic(errCannotIncludeParsers) + } + + p.includedBy = append(p.includedBy, i) +} + +func (p *quantifierParser) cacheIncluded(*context, *Node) { + panic(errCannotIncludeParsers) +} + +func (p *quantifierParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing quantifier", c.offset) + + if p.commit&Documentation != 0 { + t.Out1("fail, doc") + c.fail(c.offset) + return + } + + if c.excluded(c.offset, p.name) { + t.Out1("excluded") + c.fail(c.offset) + return + } + + c.exclude(c.offset, p.name) + defer c.include(c.offset, p.name) + + node := newNode(p.name, p.commit, c.offset, c.offset) + + // this way of checking the cache definitely needs the testing of the russ cox form + for { + if p.max >= 0 && node.nodeLength() == p.max { + t.Out1("success, max reached") + c.cache.set(node.from, p.name, node) + for _, i := range p.includedBy { + i.cacheIncluded(c, node) + } + + c.success(node) + return + } + + t.Out2("next quantifier item") + + // n, m, ok := c.cache.get(c.offset, p.item.nodeName()) + m, ok := c.fromCache(p.item.nodeName()) + if ok { + t.Out1("quantifier item found in cache, match:", m, c.offset, c.node.tokenLength()) + if m { + node.append(c.node) + if c.node.tokenLength() > 0 { + t.Out2("taking next after cached found") + continue + } + } + + if node.nodeLength() >= p.min { + t.Out1("success, no more match") + c.cache.set(node.from, p.name, node) + for _, i := range p.includedBy { + i.cacheIncluded(c, node) + } + + c.success(node) + } else { + t.Out1("fail, min not reached") + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + } + + return + } + + p.item.parse(t, c) + if !c.match || c.node.tokenLength() == 0 { + if node.nodeLength() >= p.min { + t.Out1("success, no more match") + c.cache.set(node.from, p.name, node) + for _, i := range p.includedBy { + i.cacheIncluded(c, node) + } + + c.success(node) + } else { + t.Out1("fail, min not reached") + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + } + + return + } + + node.append(c.node) + } +} diff --git a/registry.go b/registry.go new file mode 100644 index 0000000..09160ad --- /dev/null +++ b/registry.go @@ -0,0 +1,36 @@ +package parse + +type registry struct { + definitions map[string]definition + parsers map[string]parser +} + +func newRegistry() *registry { + return ®istry{ + definitions: make(map[string]definition), + parsers: make(map[string]parser), + } +} + +func (r *registry) definition(name string) (definition, bool) { + d, ok := r.definitions[name] + return d, ok +} + +func (r *registry) parser(name string) (parser, bool) { + p, ok := r.parsers[name] + return p, ok +} + +func (r *registry) setDefinition(d definition) error { + if _, ok := r.definitions[d.nodeName()]; ok { + return duplicateDefinition(d.nodeName()) + } + + r.definitions[d.nodeName()] = d + return nil +} + +func (r *registry) setParser(p parser) { + r.parsers[p.nodeName()] = p +} diff --git a/scheme.p b/scheme.p new file mode 100644 index 0000000..28cf423 --- /dev/null +++ b/scheme.p @@ -0,0 +1,14 @@ +// TODO: comment + +ws:alias = [ \b\f\n\r\t\v]; +comment:alias = ";" [^\n]*; +wsc:alias = ws | comment; +number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; +string = "\"" ([^\\"] | "\\" .)* "\""; +symbol = ([^\\ \n\t\b\f\r\v\"()\[\]#] | "\\" .)+; +list-form:alias = "(" wsc* (expression wsc*)* ")" + | "[" wsc* (expression wsc*)* "]"; +list = list-form; +vector = "#" list-form; +expression:alias = number | string | symbol | list; +scheme = wsc* (expression wsc*)*; diff --git a/scheme_test.go b/scheme_test.go new file mode 100644 index 0000000..1946466 --- /dev/null +++ b/scheme_test.go @@ -0,0 +1,84 @@ +package parse + +import "testing" + +func TestScheme(t *testing.T) { + test(t, "scheme.p", "scheme", []testItem{{ + msg: "empty", + }, { + msg: "a function", + text: ` + (define (foo a b c) + (let ([bar (+ a b c)] + [baz (- a b c)]) + (* bar baz))) + `, + nodes: []*Node{{ + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + }}, + }}, + ignorePosition: true, + }}) +} diff --git a/sequence.go b/sequence.go new file mode 100644 index 0000000..0539e62 --- /dev/null +++ b/sequence.go @@ -0,0 +1,187 @@ +package parse + +type sequenceDefinition struct { + name string + commit CommitType + items []string +} + +type sequenceParser struct { + name string + commit CommitType + items []parser + including []parser +} + +func newSequence(name string, ct CommitType, items []string) *sequenceDefinition { + return &sequenceDefinition{ + name: name, + commit: ct, + items: items, + } +} + +func (d *sequenceDefinition) nodeName() string { return d.name } + +func (d *sequenceDefinition) parser(r *registry, path []string) (parser, error) { + if stringsContain(path, d.name) { + panic(errCannotIncludeParsers) + } + + p, ok := r.parser(d.name) + if ok { + return p, nil + } + + sp := &sequenceParser{ + name: d.name, + commit: d.commit, + } + + r.setParser(sp) + + var items []parser + path = append(path, d.name) + for _, name := range d.items { + item, ok := r.parser(name) + if ok { + items = append(items, item) + continue + } + + itemDefinition, ok := r.definition(name) + if !ok { + return nil, parserNotFound(name) + } + + item, err := itemDefinition.parser(r, path) + if err != nil { + return nil, err + } + + items = append(items, item) + } + + // for single items, acts like a choice + if len(items) == 1 { + items[0].setIncludedBy(sp, path) + } + + sp.items = items + return sp, nil +} + +func (d *sequenceDefinition) commitType() CommitType { + return d.commit +} + +func (p *sequenceParser) nodeName() string { return p.name } + +func (p *sequenceParser) setIncludedBy(i parser, path []string) { + if stringsContain(path, p.name) { + return + } + + p.including = append(p.including, i) +} + +func (p *sequenceParser) cacheIncluded(c *context, n *Node) { + if !c.excluded(n.from, p.name) { + return + } + + nc := newNode(p.name, p.commit, n.from, n.to) + nc.append(n) + c.cache.set(nc.from, p.name, nc) + + // maybe it is enough to cache only those that are on the path + for _, i := range p.including { + i.cacheIncluded(c, nc) + } +} + +/* +should be possible to parse: + +a = "0" +b = "1" +c = a* e b +d = a | c +e = b | d + +input: 111 +*/ + +func (p *sequenceParser) parse(t Trace, c *context) { + t = t.Extend(p.name) + t.Out1("parsing sequence", c.offset) + + if p.commit&Documentation != 0 { + t.Out1("fail, doc") + c.fail(c.offset) + return + } + + // TODO: maybe we can check the cache here? no because that would exclude the continuations + + if c.excluded(c.offset, p.name) { + t.Out1("excluded") + c.fail(c.offset) + return + } + + c.exclude(c.offset, p.name) + defer c.include(c.offset, p.name) + + items := p.items + node := newNode(p.name, p.commit, c.offset, c.offset) + + for len(items) > 0 { + t.Out2("next sequence item") + // n, m, ok := c.cache.get(c.offset, items[0].nodeName()) + m, ok := c.fromCache(items[0].nodeName()) + if ok { + t.Out1("sequence item found in cache, match:", m, items[0].nodeName(), c.offset) + if m { + t.Out2("sequence item from cache:", c.node.Name, len(c.node.Nodes), c.node.from) + node.append(c.node) + items = items[1:] + continue + } + + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + return + } + + items[0].parse(t, c) + items = items[1:] + + if !c.match { + t.Out1("fail, item failed") + c.cache.set(node.from, p.name, nil) + c.fail(node.from) + return + } + + if c.node.tokenLength() > 0 { + t.Out2("appending sequence item", c.node.Name, len(c.node.Nodes)) + node.append(c.node) + } + } + + t.Out1("success, items parsed") + t.Out2("nodes", node.nodeLength()) + if node.Name == "group" { + t.Out2("caching group", node.from, node.Nodes[2].Name, node.Nodes[2].nodeLength()) + } + + // is this cached item ever taken? + c.cache.set(node.from, p.name, node) + for _, i := range p.including { + i.cacheIncluded(c, node) + } + + t.Out2("caching sequence and included by done") + c.success(node) +} diff --git a/sexpr.p b/sexpr.p new file mode 100644 index 0000000..5ac70c8 --- /dev/null +++ b/sexpr.p @@ -0,0 +1,9 @@ +ws:alias = [ \b\f\n\r\t\v]; +comment:alias = ";" [^\n]*; +wsc:alias = ws | comment; +number = "-"? ("0" | [1-9][0-9]*) ("." [0-9]+)? ([eE] [+\-]? [0-9]+)?; +string = "\"" ([^\\"] | "\\" .)* "\""; +symbol = ([^\\ \n\t\b\f\r\v\"()] | "\\" .)+; +list = "(" wsc* (expression wsc*)* ")"; +expression:alias = number | string | symbol | list; +s-expression = expression; diff --git a/sexpr_test.go b/sexpr_test.go new file mode 100644 index 0000000..0628b45 --- /dev/null +++ b/sexpr_test.go @@ -0,0 +1,71 @@ +package parse + +import "testing" + +func TestSExpr(t *testing.T) { + test(t, "sexpr.p", "s-expression", []testItem{{ + msg: "number", + text: "42", + nodes: []*Node{{ + Name: "number", + }}, + ignorePosition: true, + }, { + msg: "string", + text: "\"foo\"", + nodes: []*Node{{ + Name: "string", + }}, + ignorePosition: true, + }, { + msg: "symbol", + text: "foo", + nodes: []*Node{{ + Name: "symbol", + }}, + ignorePosition: true, + }, { + msg: "nil", + text: "()", + nodes: []*Node{{ + Name: "list", + }}, + ignorePosition: true, + }, { + msg: "list", + text: "(foo bar baz)", + nodes: []*Node{{ + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "symbol", + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }, { + msg: "embedded list", + text: "(foo (bar (baz)) qux)", + nodes: []*Node{{ + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }, { + Name: "list", + Nodes: []*Node{{ + Name: "symbol", + }}, + }}, + }, { + Name: "symbol", + }}, + }}, + ignorePosition: true, + }}) +} diff --git a/syntax.go b/syntax.go new file mode 100644 index 0000000..aa44626 --- /dev/null +++ b/syntax.go @@ -0,0 +1,158 @@ +package parse + +import ( + "bufio" + "errors" + "fmt" + "io" +) + +type CommitType int + +const ( + None CommitType = 0 + Alias CommitType = 1 << iota + Documentation + Root +) + +type Syntax struct { + trace Trace + registry *registry + initialized bool + initFailed bool + rootSet bool + root definition + parser parser +} + +var ( + ErrSyntaxInitialized = errors.New("syntax initialized") + ErrInitFailed = errors.New("init failed") + ErrNoParsersDefined = errors.New("no parsers defined") + ErrInvalidInput = errors.New("invalid input") + ErrInvalidCharacter = errors.New("invalid character") // two use cases: utf8 and boot + ErrUnexpectedCharacter = errors.New("unexpected character") + ErrInvalidSyntax = errors.New("invalid syntax") + ErrRootAlias = errors.New("root node cannot be an alias") +) + +func duplicateDefinition(name string) error { + return fmt.Errorf("duplicate definition: %s", name) +} + +func NewSyntax(t Trace) *Syntax { + if t == nil { + t = NewTrace(0) + } + + return &Syntax{ + trace: t, + registry: newRegistry(), + } +} + +func (s *Syntax) register(d definition) error { + if s.initialized { + return ErrSyntaxInitialized + } + + if d.commitType()&Root != 0 { + s.root = d + s.rootSet = true + } else if !s.rootSet { + s.root = d + } + + return s.registry.setDefinition(d) +} + +func (s *Syntax) AnyChar(name string, ct CommitType) error { + return s.register(newChar(name, ct, true, false, nil, nil)) +} + +func (s *Syntax) Class(name string, ct CommitType, not bool, chars []rune, ranges [][]rune) error { + return s.register(newChar(name, ct, false, not, chars, ranges)) +} + +func childName(name string, childIndex int) string { + return fmt.Sprintf("%s:%d", name, childIndex) +} + +func (s *Syntax) CharSequence(name string, ct CommitType, chars []rune) error { + var refs []string + for i, ci := range chars { + ref := childName(name, i) + refs = append(refs, ref) + if err := s.register(newChar(ref, Alias, false, false, []rune{ci}, nil)); err != nil { + return err + } + } + + return s.Sequence(name, ct, refs...) +} + +func (s *Syntax) Quantifier(name string, ct CommitType, item string, min, max int) error { + return s.register(newQuantifier(name, ct, item, min, max)) +} + +func (s *Syntax) Sequence(name string, ct CommitType, items ...string) error { + return s.register(newSequence(name, ct, items)) +} + +func (s *Syntax) Choice(name string, ct CommitType, elements ...string) error { + return s.register(newChoice(name, ct, elements)) +} + +func (s *Syntax) Read(r io.Reader) error { + if s.initialized { + return ErrSyntaxInitialized + } + + return nil +} + +func (s *Syntax) Init() error { + if s.initFailed { + return ErrInitFailed + } + + if s.initialized { + return nil + } + + if s.root == nil { + return ErrNoParsersDefined + } + + if s.root.commitType()&Alias != 0 { + return ErrRootAlias + } + + var err error + s.parser, err = s.root.parser(s.registry, nil) + if err != nil { + s.initFailed = true + return err + } + + s.initialized = true + return nil +} + +func (s *Syntax) Generate(w io.Writer) error { + if err := s.Init(); err != nil { + return err + } + + return nil +} + +func (s *Syntax) Parse(r io.Reader) (*Node, error) { + if err := s.Init(); err != nil { + return nil, err + } + + c := newContext(bufio.NewReader(r)) + return parse(s.trace, s.parser, c) +} diff --git a/syntax.p b/syntax.p new file mode 100644 index 0000000..87a483a --- /dev/null +++ b/syntax.p @@ -0,0 +1,78 @@ +ws:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; +wsc:alias = ws | comment; + +block-comment:alias = "/*" ("*" [^/] | [^*])* "*/"; +line-comment:alias = "//" [^\n]*; +comment-segment:alias = line-comment | block-comment; +ws-no-nl:alias = " " | "\t" | "\b" | "\f" | "\r" | "\v"; +comment = comment-segment (ws-no-nl* "\n"? ws-no-nl* comment-segment)*; + +any-char = "."; // equivalent to [^] + +// TODO: document matching terminal: [] + +// TODO: handle char class equivalences + +// TODO: enable streaming + +// TODO: set route function in generated code? + +// caution: newline is accepted +class-not = "^"; +class-char = [^\\\[\]\^\-] | "\\" .; +char-range = class-char "-" class-char; +char-class = "[" class-not? (class-char | char-range)* "]"; + +// caution: newline is accepted +sequence-char = [^\\"] | "\\" .; +char-sequence = "\"" sequence-char* "\""; + +// TODO: this can be mixed up with sequence. Is it fine? fix this, see mml symbol +terminal:alias = any-char | char-class | char-sequence; + +symbol = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+; + +group:alias = "(" wsc* expression wsc* ")"; + +number:alias = [0-9]+; +count = number; +count-quantifier = "{" wsc* count wsc* "}"; +range-from = number; +range-to = number; +range-quantifier = "{" wsc* range-from? wsc* "," wsc* range-to? wsc* "}"; +one-or-more = "+"; +zero-or-more = "*"; +zero-or-one = "?"; +quantity:alias = count-quantifier + | range-quantifier + | one-or-more + | zero-or-more + | zero-or-one; + +quantifier = (terminal | symbol | group) wsc* quantity; + +item:alias = terminal | symbol | group | quantifier; +sequence = item (wsc* item)+; + +element:alias = terminal | symbol | group | quantifier | sequence; + +// DOC: once cached, doesn't try again, even in a new context, therefore the order may matter +choice = element (wsc* "|" wsc* element)+; + +// DOC: not having 'not' needs some tricks sometimes + +expression:alias = terminal + | symbol + | group + | quantifier + | sequence + | choice; + +alias = "alias"; +doc = "doc"; +root = "root"; +flag:alias = alias | doc | root; +definition = symbol (":" flag)* wsc* "=" wsc* expression; + +definitions:alias = definition (wsc* ";" (wsc | ";")* definition)*; +syntax:root = (wsc | ";")* definitions? (wsc | ";")*; diff --git a/trace.go b/trace.go new file mode 100644 index 0000000..04f7e97 --- /dev/null +++ b/trace.go @@ -0,0 +1,72 @@ +package parse + +import ( + "fmt" + "os" +) + +type Trace interface { + Out(...interface{}) + Out1(...interface{}) + Out2(...interface{}) + Out3(...interface{}) + Extend(string) Trace +} + +type DefaultTrace struct { + level int + path string +} + +type NopTrace struct{} + +func NewTrace(level int) *DefaultTrace { + return &DefaultTrace{ + level: level, + path: "/", + } +} + +func (t *DefaultTrace) printlnLevel(l int, a ...interface{}) { + if l > t.level { + return + } + + fmt.Fprintln(os.Stderr, append([]interface{}{t.path}, a...)...) +} + +func (t *DefaultTrace) Out(a ...interface{}) { + t.printlnLevel(0, a...) +} + +func (t *DefaultTrace) Out1(a ...interface{}) { + t.printlnLevel(1, a...) +} + +func (t *DefaultTrace) Out2(a ...interface{}) { + t.printlnLevel(2, a...) +} + +func (t *DefaultTrace) Out3(a ...interface{}) { + t.printlnLevel(3, a...) +} + +func (t *DefaultTrace) Extend(name string) Trace { + var p string + if t.path == "/" { + p = t.path + name + } else { + p = t.path + "/" + name + } + + return &DefaultTrace{ + level: t.level, + path: p, + } +} + +func (NopTrace) Out(...interface{}) {} +func (NopTrace) Out1(...interface{}) {} +func (NopTrace) Out2(...interface{}) {} +func (NopTrace) Out3(...interface{}) {} +func (t NopTrace) Extend(string) Trace { return t }