From 78d08abb78c5af6bad9a56a3635c0158c57eb2ee Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Sun, 26 Nov 2017 01:49:22 +0100 Subject: [PATCH] add formatting for definitions --- char.go | 37 ++++++ choice.go | 18 +++ define.go | 2 +- unescape.go => escape.go | 43 ++++++- unescape_test.go => escape_test.go | 13 ++ format_test.go | 194 +++++++++++++++++++++++++++++ notes.txt | 3 + sequence.go | 150 +++++++++++++++++++--- syntax.go | 10 ++ 9 files changed, 449 insertions(+), 21 deletions(-) rename unescape.go => escape.go (57%) rename unescape_test.go => escape_test.go (69%) create mode 100644 format_test.go diff --git a/char.go b/char.go index e0158be..dd5dee2 100644 --- a/char.go +++ b/char.go @@ -1,5 +1,10 @@ package treerack +const ( + charClassEscape = '\\' + charClassBanned = "\\[]^-\b\f\n\r\t\v" +) + type charParser struct { name string id int @@ -36,6 +41,38 @@ func (p *charParser) addGeneralization(int) {} func (p *charParser) parser() parser { return p } func (p *charParser) builder() builder { return p } +func (p *charParser) isSingleChar() bool { + return !p.not && len(p.chars) == 1 && len(p.ranges) == 0 +} + +func (p *charParser) format(_ *registry, f formatFlags) string { + if p.not && len(p.chars) == 0 && len(p.ranges) == 0 { + return "." + } + + esc := func(c ...rune) []rune { + return escape(charClassEscape, []rune(charClassBanned), c) + } + + var s []rune + s = append(s, '[') + + if p.not { + s = append(s, '^') + } + + s = append(s, esc(p.chars...)...) + + for i := range p.ranges { + s = append(s, esc(p.ranges[i][0])...) + s = append(s, '-') + s = append(s, esc(p.ranges[i][1])...) + } + + s = append(s, ']') + return string(s) +} + func matchChars(chars []rune, ranges [][]rune, not bool, char rune) bool { for _, ci := range chars { if ci == char { diff --git a/choice.go b/choice.go index d195414..3610106 100644 --- a/choice.go +++ b/choice.go @@ -131,6 +131,24 @@ func (d *choiceDefinition) parser() parser { func (d *choiceDefinition) builder() builder { return d.cbuilder } +func (d *choiceDefinition) format(r *registry, f formatFlags) string { + var chars []rune + for i := range d.options { + if i > 0 { + chars = append(chars, []rune(" | ")...) + } + + optionDef, _ := r.definition(d.options[i]) + if optionDef.commitType()&userDefined != 0 { + chars = append(chars, []rune(optionDef.nodeName())...) + } else { + chars = append(chars, []rune(optionDef.format(r, f))...) + } + } + + return string(chars) +} + func (p *choiceParser) nodeName() string { return p.name } func (p *choiceParser) nodeID() int { return p.id } diff --git a/define.go b/define.go index 2b1e7c5..6823358 100644 --- a/define.go +++ b/define.go @@ -191,7 +191,7 @@ func defineDefinition(s *Syntax, n *Node) error { return defineExpression( s, n.Nodes[0].Text(), - flagsToCommitType(n.Nodes[1:len(n.Nodes)-1]), + flagsToCommitType(n.Nodes[1:len(n.Nodes)-1])|userDefined, n.Nodes[len(n.Nodes)-1], ) } diff --git a/unescape.go b/escape.go similarity index 57% rename from unescape.go rename to escape.go index f6bb163..f938526 100644 --- a/unescape.go +++ b/escape.go @@ -10,18 +10,51 @@ func runesContain(rs []rune, r rune) bool { return false } +func escapeChar(escape, c rune) []rune { + switch c { + case '\b': + return []rune{escape, 'b'} + case '\f': + return []rune{escape, 'f'} + case '\n': + return []rune{escape, 'n'} + case '\r': + return []rune{escape, 'r'} + case '\t': + return []rune{escape, 't'} + case '\v': + return []rune{escape, 'v'} + default: + return []rune{escape, c} + } +} + +func escape(escape rune, banned, chars []rune) []rune { + var escaped []rune + for i := range chars { + if runesContain(banned, chars[i]) { + escaped = append(escaped, escapeChar(escape, chars[i])...) + continue + } + + escaped = append(escaped, chars[i]) + } + + return escaped +} + func unescapeChar(c rune) rune { switch c { - case 'n': - return '\n' - case 't': - return '\t' case 'b': return '\b' case 'f': return '\f' + case 'n': + return '\n' case 'r': return '\r' + case 't': + return '\t' case 'v': return '\v' default: @@ -29,7 +62,7 @@ func unescapeChar(c rune) rune { } } -func unescape(escape rune, banned []rune, chars []rune) ([]rune, error) { +func unescape(escape rune, banned, chars []rune) ([]rune, error) { var ( unescaped []rune escaped bool diff --git a/unescape_test.go b/escape_test.go similarity index 69% rename from unescape_test.go rename to escape_test.go index 3af88c4..29f7e05 100644 --- a/unescape_test.go +++ b/escape_test.go @@ -27,3 +27,16 @@ func TestUnescape(t *testing.T) { } }) } + +func TestEscape(t *testing.T) { + const ( + banned = "\b\f\n\r\t\v" + unescaped = "\b\f\n\r\t\v" + expected = "\\b\\f\\n\\r\\t\\v" + ) + + e := escape('\\', []rune(banned), []rune(unescaped)) + if string(e) != expected { + t.Error("failed to escape", string(e), expected) + } +} diff --git a/format_test.go b/format_test.go new file mode 100644 index 0000000..2ac06a5 --- /dev/null +++ b/format_test.go @@ -0,0 +1,194 @@ +package treerack + +import ( + "fmt" + "testing" +) + +func TestCharFormat(t *testing.T) { + type testItem struct { + title string + definition string + output string + } + + for _, test := range []testItem{{ + title: "empty", + definition: "[]", + output: "[]", + }, { + title: "one char", + definition: "[a]", + output: "[a]", + }, { + title: "escaped char", + definition: "[\\a]", + output: "[a]", + }, { + title: "escaped control char", + definition: "[\\^]", + output: "[\\^]", + }, { + title: "escaped whitespace char", + definition: "[\\n]", + output: "[\\n]", + }, { + title: "escaped verbatim whitespace char", + definition: "[\n]", + output: "[\\n]", + }, { + title: "escaped range", + definition: "[\\b-\\v]", + output: "[\\b-\\v]", + }, { + title: "anything", + definition: ".", + output: ".", + }, { + title: "not something", + definition: "[^abc]", + output: "[^abc]", + }, { + title: "range", + definition: "[a-z]", + output: "[a-z]", + }, { + title: "range and char mixed", + definition: "[a-z_\\-A-Z]", + output: "[_\\-a-zA-Z]", + }} { + t.Run(test.title, func(t *testing.T) { + defString := fmt.Sprintf("def = %s", test.definition) + s, err := openSyntaxString(defString) + if err != nil { + t.Error(err) + return + } + + def, ok := s.registry.definition(childName("def", 0)) + if !ok { + t.Error("invalid syntax") + return + } + + output := def.format(s.registry, formatNone) + if output != test.output { + t.Error("invalid output", output, test.output) + } + }) + } +} + +func TestSequenceFormat(t *testing.T) { + type testItem struct { + title string + syntax string + output string + } + + for _, test := range []testItem{{ + title: "empty char sequence", + syntax: `def = ""`, + output: `""`, + }, { + title: "char sequence", + syntax: `def = "abc"`, + output: `"abc"`, + }, { + title: "char sequence, escaped", + syntax: `def = "\\n"`, + output: `"\\n"`, + }, { + title: "chars", + syntax: `def = "abc" [a-z]`, + output: `"abc" [a-z]`, + }, { + title: "quantifiers, 0-or-more", + syntax: `def = "a"*`, + output: `"a"*`, + }, { + title: "quantifiers, 1-or-more", + syntax: `def = "a"+`, + output: `"a"+`, + }, { + title: "quantifiers, 0-or-one", + syntax: `def = "a"?`, + output: `"a"?`, + }, { + title: "quantifiers, exact number", + syntax: `def = "a"{3}`, + output: `"a"{3}`, + }, { + title: "quantifiers, max", + syntax: `def = "a"{0, 3}`, + output: `"a"{,3}`, + }, { + title: "quantifiers, min", + syntax: `def = "a"{3,}`, + output: `"a"{3,}`, + }, { + title: "quantifiers, range", + syntax: `def = "a"{3, 9}`, + output: `"a"{3,9}`, + }, { + title: "symbols", + syntax: `a = "a"; b = "b"; c = "c"; def = a b c`, + output: "a b c", + }, { + title: "choice in sequence", + syntax: `def = "a" ("b" | "c")`, + output: `"a" ("b" | "c")`, + }, { + title: "grouped quantifier", + syntax: `def = ("a" "b"){3}`, + output: `("a" "b"){3}`, + }} { + t.Run(test.title, func(t *testing.T) { + s, err := openSyntaxString(test.syntax) + if err != nil { + t.Error(err) + return + } + + output := s.root.format(s.registry, formatNone) + if output != test.output { + t.Error("invalid output", output, test.output) + } + }) + } +} + +func TestChoiceFormat(t *testing.T) { + type testItem struct { + title string + syntax string + output string + } + + for _, test := range []testItem{{ + title: "choice of char sequences", + syntax: `def = "a" | "b" | "c"`, + output: `"a" | "b" | "c"`, + }, { + title: "choice of inline sequences", + syntax: `def = "a" "b" | "c" "d" | "e" "f"`, + output: `"a" "b" | "c" "d" | "e" "f"`, + }, { + title: "choice of symbol", + syntax: `a = "a"; b = "b"; c = "c"; def = a | b | c`, + output: "a | b | c", + }} { + t.Run(test.title, func(t *testing.T) { + s, err := openSyntaxString(test.syntax) + if err != nil { + t.Error(err) + return + } + + output := s.root.format(s.registry, formatNone) + if output != test.output { + t.Error("invalid output", output, test.output) + } + }) + } +} diff --git a/notes.txt b/notes.txt index cae8e21..bb50ad2 100644 --- a/notes.txt +++ b/notes.txt @@ -25,6 +25,9 @@ code generation js documentation flag support custom tokenization streaming +verify choice and sequence preference +formatter +pretty [problems] can the root be an alias? check the commit mechanism diff --git a/sequence.go b/sequence.go index 8287a52..b1358f3 100644 --- a/sequence.go +++ b/sequence.go @@ -1,9 +1,12 @@ package treerack +import "strconv" + type sequenceDefinition struct { name string id int commit CommitType + originalItems []SequenceItem items []SequenceItem itemDefs []definition ranges [][]int @@ -35,10 +38,16 @@ type sequenceBuilder struct { } func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition { + original := make([]SequenceItem, len(items)) + for i := range items { + original[i] = items[i] + } + return &sequenceDefinition{ - name: name, - commit: ct, - items: items, + name: name, + commit: ct, + items: items, + originalItems: original, } } @@ -49,20 +58,26 @@ func (d *sequenceDefinition) setID(id int) { d.id = id } func (d *sequenceDefinition) commitType() CommitType { return d.commit } func (d *sequenceDefinition) setCommitType(ct CommitType) { d.commit = ct } +func normalizeItemRange(item SequenceItem) SequenceItem { + if item.Min == 0 && item.Max == 0 { + item.Min, item.Max = 1, 1 + return item + } + + if item.Min <= 0 { + item.Min = 0 + } + + if item.Max <= 0 { + item.Max = -1 + } + + return item +} + func (d *sequenceDefinition) initRanges() { for i, item := range d.items { - if item.Min == 0 && item.Max == 0 { - item.Min, item.Max = 1, 1 - } else { - if item.Min <= 0 { - item.Min = 0 - } - - if item.Max <= 0 { - item.Max = -1 - } - } - + item = normalizeItemRange(item) d.items[i] = item d.ranges = append(d.ranges, []int{item.Min, item.Max}) } @@ -168,6 +183,111 @@ func (d *sequenceDefinition) parser() parser { func (d *sequenceDefinition) builder() builder { return d.sbuilder } +func (d *sequenceDefinition) isCharSequence(r *registry) bool { + for i := range d.originalItems { + item := normalizeItemRange(d.originalItems[i]) + if item.Min != 1 || item.Max != 1 { + return false + } + + itemDef, _ := r.definition(d.originalItems[i].Name) + c, ok := itemDef.(*charParser) + if !ok || !c.isSingleChar() { + return false + } + } + + return true +} + +func (d *sequenceDefinition) format(r *registry, f formatFlags) string { + if d.isCharSequence(r) { + var chars []rune + for i := range d.originalItems { + itemDef, _ := r.definition(d.originalItems[i].Name) + c, _ := itemDef.(*charParser) + chars = append(chars, c.chars[0]) + } + + chars = escape(charClassEscape, []rune(charClassBanned), chars) + return string(append([]rune{'"'}, append(chars, '"')...)) + } + + var chars []rune + for i := range d.originalItems { + if len(chars) > 0 { + chars = append(chars, ' ') + } + + item := normalizeItemRange(d.originalItems[i]) + needsQuantifier := item.Min != 1 || item.Max != 1 + + itemDef, _ := r.definition(item.Name) + isSymbol := itemDef.commitType()&userDefined != 0 + + ch, isChoice := itemDef.(*choiceDefinition) + isChoiceOfMultiple := isChoice && len(ch.options) > 1 + + seq, isSequence := itemDef.(*sequenceDefinition) + isSequenceOfMultiple := isSequence && len(seq.originalItems) > 1 && !seq.isCharSequence(r) + + needsGrouping := isChoiceOfMultiple || isSequenceOfMultiple + + if isSymbol { + chars = append(chars, []rune(itemDef.nodeName())...) + } else { + if needsGrouping { + chars = append(chars, '(') + } + + chars = append(chars, []rune(itemDef.format(r, f))...) + + if needsGrouping { + chars = append(chars, ')') + } + } + + if !needsQuantifier { + continue + } + + if item.Min == 0 && item.Max == 1 { + chars = append(chars, '?') + continue + } + + if item.Min == 0 && item.Max < 0 { + chars = append(chars, '*') + continue + } + + if item.Min == 1 && item.Max < 0 { + chars = append(chars, '+') + continue + } + + chars = append(chars, '{') + + if item.Min == item.Max { + chars = append(chars, []rune(strconv.Itoa(item.Min))...) + } else { + if item.Min > 0 { + chars = append(chars, []rune(strconv.Itoa(item.Min))...) + } + + chars = append(chars, ',') + + if item.Max >= 0 { + chars = append(chars, []rune(strconv.Itoa(item.Max))...) + } + } + + chars = append(chars, '}') + } + + return string(chars) +} + func (p *sequenceParser) nodeName() string { return p.name } func (p *sequenceParser) nodeID() int { return p.id } diff --git a/syntax.go b/syntax.go index c7d933d..d51c8ec 100644 --- a/syntax.go +++ b/syntax.go @@ -15,6 +15,15 @@ const ( Whitespace NoWhitespace Root + + userDefined +) + +type formatFlags int + +const ( + formatNone formatFlags = 0 + formatPretty formatFlags = 1 << iota ) // if min=0&&max=0, it means min=1,max=1 @@ -48,6 +57,7 @@ type definition interface { addGeneralization(int) parser() parser builder() builder + format(*registry, formatFlags) string } type parser interface {