add formatting for definitions

This commit is contained in:
Arpad Ryszka 2017-11-26 01:49:22 +01:00
parent 0837802209
commit 78d08abb78
9 changed files with 449 additions and 21 deletions

37
char.go
View File

@ -1,5 +1,10 @@
package treerack
const (
charClassEscape = '\\'
charClassBanned = "\\[]^-\b\f\n\r\t\v"
)
type charParser struct {
name string
id int
@ -36,6 +41,38 @@ func (p *charParser) addGeneralization(int) {}
func (p *charParser) parser() parser { return p }
func (p *charParser) builder() builder { return p }
func (p *charParser) isSingleChar() bool {
return !p.not && len(p.chars) == 1 && len(p.ranges) == 0
}
func (p *charParser) format(_ *registry, f formatFlags) string {
if p.not && len(p.chars) == 0 && len(p.ranges) == 0 {
return "."
}
esc := func(c ...rune) []rune {
return escape(charClassEscape, []rune(charClassBanned), c)
}
var s []rune
s = append(s, '[')
if p.not {
s = append(s, '^')
}
s = append(s, esc(p.chars...)...)
for i := range p.ranges {
s = append(s, esc(p.ranges[i][0])...)
s = append(s, '-')
s = append(s, esc(p.ranges[i][1])...)
}
s = append(s, ']')
return string(s)
}
func matchChars(chars []rune, ranges [][]rune, not bool, char rune) bool {
for _, ci := range chars {
if ci == char {

View File

@ -131,6 +131,24 @@ func (d *choiceDefinition) parser() parser {
func (d *choiceDefinition) builder() builder { return d.cbuilder }
func (d *choiceDefinition) format(r *registry, f formatFlags) string {
var chars []rune
for i := range d.options {
if i > 0 {
chars = append(chars, []rune(" | ")...)
}
optionDef, _ := r.definition(d.options[i])
if optionDef.commitType()&userDefined != 0 {
chars = append(chars, []rune(optionDef.nodeName())...)
} else {
chars = append(chars, []rune(optionDef.format(r, f))...)
}
}
return string(chars)
}
func (p *choiceParser) nodeName() string { return p.name }
func (p *choiceParser) nodeID() int { return p.id }

View File

@ -191,7 +191,7 @@ func defineDefinition(s *Syntax, n *Node) error {
return defineExpression(
s,
n.Nodes[0].Text(),
flagsToCommitType(n.Nodes[1:len(n.Nodes)-1]),
flagsToCommitType(n.Nodes[1:len(n.Nodes)-1])|userDefined,
n.Nodes[len(n.Nodes)-1],
)
}

View File

@ -10,18 +10,51 @@ func runesContain(rs []rune, r rune) bool {
return false
}
func escapeChar(escape, c rune) []rune {
switch c {
case '\b':
return []rune{escape, 'b'}
case '\f':
return []rune{escape, 'f'}
case '\n':
return []rune{escape, 'n'}
case '\r':
return []rune{escape, 'r'}
case '\t':
return []rune{escape, 't'}
case '\v':
return []rune{escape, 'v'}
default:
return []rune{escape, c}
}
}
func escape(escape rune, banned, chars []rune) []rune {
var escaped []rune
for i := range chars {
if runesContain(banned, chars[i]) {
escaped = append(escaped, escapeChar(escape, chars[i])...)
continue
}
escaped = append(escaped, chars[i])
}
return escaped
}
func unescapeChar(c rune) rune {
switch c {
case 'n':
return '\n'
case 't':
return '\t'
case 'b':
return '\b'
case 'f':
return '\f'
case 'n':
return '\n'
case 'r':
return '\r'
case 't':
return '\t'
case 'v':
return '\v'
default:
@ -29,7 +62,7 @@ func unescapeChar(c rune) rune {
}
}
func unescape(escape rune, banned []rune, chars []rune) ([]rune, error) {
func unescape(escape rune, banned, chars []rune) ([]rune, error) {
var (
unescaped []rune
escaped bool

View File

@ -27,3 +27,16 @@ func TestUnescape(t *testing.T) {
}
})
}
func TestEscape(t *testing.T) {
const (
banned = "\b\f\n\r\t\v"
unescaped = "\b\f\n\r\t\v"
expected = "\\b\\f\\n\\r\\t\\v"
)
e := escape('\\', []rune(banned), []rune(unescaped))
if string(e) != expected {
t.Error("failed to escape", string(e), expected)
}
}

194
format_test.go Normal file
View File

@ -0,0 +1,194 @@
package treerack
import (
"fmt"
"testing"
)
func TestCharFormat(t *testing.T) {
type testItem struct {
title string
definition string
output string
}
for _, test := range []testItem{{
title: "empty",
definition: "[]",
output: "[]",
}, {
title: "one char",
definition: "[a]",
output: "[a]",
}, {
title: "escaped char",
definition: "[\\a]",
output: "[a]",
}, {
title: "escaped control char",
definition: "[\\^]",
output: "[\\^]",
}, {
title: "escaped whitespace char",
definition: "[\\n]",
output: "[\\n]",
}, {
title: "escaped verbatim whitespace char",
definition: "[\n]",
output: "[\\n]",
}, {
title: "escaped range",
definition: "[\\b-\\v]",
output: "[\\b-\\v]",
}, {
title: "anything",
definition: ".",
output: ".",
}, {
title: "not something",
definition: "[^abc]",
output: "[^abc]",
}, {
title: "range",
definition: "[a-z]",
output: "[a-z]",
}, {
title: "range and char mixed",
definition: "[a-z_\\-A-Z]",
output: "[_\\-a-zA-Z]",
}} {
t.Run(test.title, func(t *testing.T) {
defString := fmt.Sprintf("def = %s", test.definition)
s, err := openSyntaxString(defString)
if err != nil {
t.Error(err)
return
}
def, ok := s.registry.definition(childName("def", 0))
if !ok {
t.Error("invalid syntax")
return
}
output := def.format(s.registry, formatNone)
if output != test.output {
t.Error("invalid output", output, test.output)
}
})
}
}
func TestSequenceFormat(t *testing.T) {
type testItem struct {
title string
syntax string
output string
}
for _, test := range []testItem{{
title: "empty char sequence",
syntax: `def = ""`,
output: `""`,
}, {
title: "char sequence",
syntax: `def = "abc"`,
output: `"abc"`,
}, {
title: "char sequence, escaped",
syntax: `def = "\\n"`,
output: `"\\n"`,
}, {
title: "chars",
syntax: `def = "abc" [a-z]`,
output: `"abc" [a-z]`,
}, {
title: "quantifiers, 0-or-more",
syntax: `def = "a"*`,
output: `"a"*`,
}, {
title: "quantifiers, 1-or-more",
syntax: `def = "a"+`,
output: `"a"+`,
}, {
title: "quantifiers, 0-or-one",
syntax: `def = "a"?`,
output: `"a"?`,
}, {
title: "quantifiers, exact number",
syntax: `def = "a"{3}`,
output: `"a"{3}`,
}, {
title: "quantifiers, max",
syntax: `def = "a"{0, 3}`,
output: `"a"{,3}`,
}, {
title: "quantifiers, min",
syntax: `def = "a"{3,}`,
output: `"a"{3,}`,
}, {
title: "quantifiers, range",
syntax: `def = "a"{3, 9}`,
output: `"a"{3,9}`,
}, {
title: "symbols",
syntax: `a = "a"; b = "b"; c = "c"; def = a b c`,
output: "a b c",
}, {
title: "choice in sequence",
syntax: `def = "a" ("b" | "c")`,
output: `"a" ("b" | "c")`,
}, {
title: "grouped quantifier",
syntax: `def = ("a" "b"){3}`,
output: `("a" "b"){3}`,
}} {
t.Run(test.title, func(t *testing.T) {
s, err := openSyntaxString(test.syntax)
if err != nil {
t.Error(err)
return
}
output := s.root.format(s.registry, formatNone)
if output != test.output {
t.Error("invalid output", output, test.output)
}
})
}
}
func TestChoiceFormat(t *testing.T) {
type testItem struct {
title string
syntax string
output string
}
for _, test := range []testItem{{
title: "choice of char sequences",
syntax: `def = "a" | "b" | "c"`,
output: `"a" | "b" | "c"`,
}, {
title: "choice of inline sequences",
syntax: `def = "a" "b" | "c" "d" | "e" "f"`,
output: `"a" "b" | "c" "d" | "e" "f"`,
}, {
title: "choice of symbol",
syntax: `a = "a"; b = "b"; c = "c"; def = a | b | c`,
output: "a | b | c",
}} {
t.Run(test.title, func(t *testing.T) {
s, err := openSyntaxString(test.syntax)
if err != nil {
t.Error(err)
return
}
output := s.root.format(s.registry, formatNone)
if output != test.output {
t.Error("invalid output", output, test.output)
}
})
}
}

View File

@ -25,6 +25,9 @@ code generation js
documentation flag
support custom tokenization
streaming
verify choice and sequence preference
formatter
pretty
[problems]
can the root be an alias? check the commit mechanism

View File

@ -1,9 +1,12 @@
package treerack
import "strconv"
type sequenceDefinition struct {
name string
id int
commit CommitType
originalItems []SequenceItem
items []SequenceItem
itemDefs []definition
ranges [][]int
@ -35,10 +38,16 @@ type sequenceBuilder struct {
}
func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefinition {
original := make([]SequenceItem, len(items))
for i := range items {
original[i] = items[i]
}
return &sequenceDefinition{
name: name,
commit: ct,
items: items,
originalItems: original,
}
}
@ -49,11 +58,12 @@ func (d *sequenceDefinition) setID(id int) { d.id = id }
func (d *sequenceDefinition) commitType() CommitType { return d.commit }
func (d *sequenceDefinition) setCommitType(ct CommitType) { d.commit = ct }
func (d *sequenceDefinition) initRanges() {
for i, item := range d.items {
func normalizeItemRange(item SequenceItem) SequenceItem {
if item.Min == 0 && item.Max == 0 {
item.Min, item.Max = 1, 1
} else {
return item
}
if item.Min <= 0 {
item.Min = 0
}
@ -61,8 +71,13 @@ func (d *sequenceDefinition) initRanges() {
if item.Max <= 0 {
item.Max = -1
}
}
return item
}
func (d *sequenceDefinition) initRanges() {
for i, item := range d.items {
item = normalizeItemRange(item)
d.items[i] = item
d.ranges = append(d.ranges, []int{item.Min, item.Max})
}
@ -168,6 +183,111 @@ func (d *sequenceDefinition) parser() parser {
func (d *sequenceDefinition) builder() builder { return d.sbuilder }
func (d *sequenceDefinition) isCharSequence(r *registry) bool {
for i := range d.originalItems {
item := normalizeItemRange(d.originalItems[i])
if item.Min != 1 || item.Max != 1 {
return false
}
itemDef, _ := r.definition(d.originalItems[i].Name)
c, ok := itemDef.(*charParser)
if !ok || !c.isSingleChar() {
return false
}
}
return true
}
func (d *sequenceDefinition) format(r *registry, f formatFlags) string {
if d.isCharSequence(r) {
var chars []rune
for i := range d.originalItems {
itemDef, _ := r.definition(d.originalItems[i].Name)
c, _ := itemDef.(*charParser)
chars = append(chars, c.chars[0])
}
chars = escape(charClassEscape, []rune(charClassBanned), chars)
return string(append([]rune{'"'}, append(chars, '"')...))
}
var chars []rune
for i := range d.originalItems {
if len(chars) > 0 {
chars = append(chars, ' ')
}
item := normalizeItemRange(d.originalItems[i])
needsQuantifier := item.Min != 1 || item.Max != 1
itemDef, _ := r.definition(item.Name)
isSymbol := itemDef.commitType()&userDefined != 0
ch, isChoice := itemDef.(*choiceDefinition)
isChoiceOfMultiple := isChoice && len(ch.options) > 1
seq, isSequence := itemDef.(*sequenceDefinition)
isSequenceOfMultiple := isSequence && len(seq.originalItems) > 1 && !seq.isCharSequence(r)
needsGrouping := isChoiceOfMultiple || isSequenceOfMultiple
if isSymbol {
chars = append(chars, []rune(itemDef.nodeName())...)
} else {
if needsGrouping {
chars = append(chars, '(')
}
chars = append(chars, []rune(itemDef.format(r, f))...)
if needsGrouping {
chars = append(chars, ')')
}
}
if !needsQuantifier {
continue
}
if item.Min == 0 && item.Max == 1 {
chars = append(chars, '?')
continue
}
if item.Min == 0 && item.Max < 0 {
chars = append(chars, '*')
continue
}
if item.Min == 1 && item.Max < 0 {
chars = append(chars, '+')
continue
}
chars = append(chars, '{')
if item.Min == item.Max {
chars = append(chars, []rune(strconv.Itoa(item.Min))...)
} else {
if item.Min > 0 {
chars = append(chars, []rune(strconv.Itoa(item.Min))...)
}
chars = append(chars, ',')
if item.Max >= 0 {
chars = append(chars, []rune(strconv.Itoa(item.Max))...)
}
}
chars = append(chars, '}')
}
return string(chars)
}
func (p *sequenceParser) nodeName() string { return p.name }
func (p *sequenceParser) nodeID() int { return p.id }

View File

@ -15,6 +15,15 @@ const (
Whitespace
NoWhitespace
Root
userDefined
)
type formatFlags int
const (
formatNone formatFlags = 0
formatPretty formatFlags = 1 << iota
)
// if min=0&&max=0, it means min=1,max=1
@ -48,6 +57,7 @@ type definition interface {
addGeneralization(int)
parser() parser
builder() builder
format(*registry, formatFlags) string
}
type parser interface {