add whitespace support
This commit is contained in:
parent
99246ff28b
commit
5fd3d6b7ba
2
char.go
2
char.go
@ -24,9 +24,11 @@ func newChar(
|
||||
}
|
||||
|
||||
func (p *charParser) nodeName() string { return p.name }
|
||||
func (p *charParser) setNodeName(n string) { p.name = n }
|
||||
func (p *charParser) nodeID() int { return p.id }
|
||||
func (p *charParser) setID(id int) { p.id = id }
|
||||
func (p *charParser) commitType() CommitType { return Alias }
|
||||
func (p *charParser) setCommitType(ct CommitType) {}
|
||||
func (p *charParser) validate(*registry, *idSet) error { return nil }
|
||||
func (p *charParser) normalize(*registry, *idSet) error { return nil }
|
||||
|
||||
|
@ -34,9 +34,11 @@ func newChoice(name string, ct CommitType, elements []string) *choiceDefinition
|
||||
}
|
||||
|
||||
func (d *choiceDefinition) nodeName() string { return d.name }
|
||||
func (d *choiceDefinition) setNodeName(n string) { d.name = n }
|
||||
func (d *choiceDefinition) nodeID() int { return d.id }
|
||||
func (d *choiceDefinition) setID(id int) { d.id = id }
|
||||
func (d *choiceDefinition) commitType() CommitType { return d.commit }
|
||||
func (d *choiceDefinition) setCommitType(ct CommitType) { d.commit = ct }
|
||||
|
||||
func (d *choiceDefinition) validate(r *registry, path *idSet) error {
|
||||
for i := range d.elements {
|
||||
|
@ -83,6 +83,10 @@ func flagsToCommitType(n []*Node) CommitType {
|
||||
switch ni.Name {
|
||||
case "alias":
|
||||
ct |= Alias
|
||||
case "ws":
|
||||
ct |= Whitespace
|
||||
case "nows":
|
||||
ct |= NoWhitespace
|
||||
case "doc":
|
||||
ct |= Documentation
|
||||
case "root":
|
||||
|
18
notes.txt
18
notes.txt
@ -1,14 +1,3 @@
|
||||
[whitespace]
|
||||
1. merge whitespaces
|
||||
2. set ws to alias
|
||||
3. apply whitespace to expressions
|
||||
- a a -> a ws* a
|
||||
- a | b -> a | b
|
||||
- a? -> a{0, 1} -> a{0, 1}
|
||||
- a+ -> a{1,} -> a (ws* a){,}
|
||||
- a* -> a{0,} -> (a (ws* a){,}){,}
|
||||
- root -> ws* root ws*
|
||||
|
||||
error reporting
|
||||
- longest parse
|
||||
- count the lines
|
||||
@ -19,9 +8,10 @@ read, with error reporting
|
||||
what was the bug with the large json from eskip?
|
||||
|
||||
[next]
|
||||
optimization
|
||||
why normalization failed
|
||||
why normalization was slower?
|
||||
missing tests, coverage:
|
||||
- validation
|
||||
- error cases
|
||||
- whitespace cases
|
||||
error reporting
|
||||
coverage
|
||||
custom tokens
|
||||
|
2
parse.go
2
parse.go
@ -4,8 +4,10 @@ import "fmt"
|
||||
|
||||
type definition interface {
|
||||
nodeName() string
|
||||
setNodeName(string)
|
||||
nodeID() int
|
||||
commitType() CommitType
|
||||
setCommitType(CommitType)
|
||||
setID(int)
|
||||
validate(*registry, *idSet) error
|
||||
normalize(*registry, *idSet) error
|
||||
|
@ -40,9 +40,11 @@ func newSequence(name string, ct CommitType, items []SequenceItem) *sequenceDefi
|
||||
}
|
||||
|
||||
func (d *sequenceDefinition) nodeName() string { return d.name }
|
||||
func (d *sequenceDefinition) setNodeName(n string) { d.name = n }
|
||||
func (d *sequenceDefinition) nodeID() int { return d.id }
|
||||
func (d *sequenceDefinition) setID(id int) { d.id = id }
|
||||
func (d *sequenceDefinition) commitType() CommitType { return d.commit }
|
||||
func (d *sequenceDefinition) setCommitType(ct CommitType) { d.commit = ct }
|
||||
|
||||
func (d *sequenceDefinition) validate(r *registry, path *idSet) error {
|
||||
for i := range d.items {
|
||||
@ -153,7 +155,6 @@ func (d *sequenceDefinition) setIncludedBy(r *registry, includedBy int, parsers
|
||||
}
|
||||
|
||||
func (d *sequenceDefinition) parser(r *registry, parsers *idSet) (parser, error) {
|
||||
// TODO: what is this for? test with sequence containing a sequence through a choice
|
||||
if parsers.has(d.id) {
|
||||
panic(cannotIncludeParsers(d.name))
|
||||
}
|
||||
|
36
syntax.go
36
syntax.go
@ -12,6 +12,8 @@ type CommitType int
|
||||
const (
|
||||
None CommitType = 0
|
||||
Alias CommitType = 1 << iota
|
||||
Whitespace
|
||||
NoWhitespace
|
||||
Documentation
|
||||
Root
|
||||
)
|
||||
@ -42,7 +44,9 @@ var (
|
||||
ErrUnexpectedCharacter = errors.New("unexpected character")
|
||||
ErrInvalidSyntax = errors.New("invalid syntax")
|
||||
ErrRootAlias = errors.New("root node cannot be an alias")
|
||||
ErrRootWhitespace = errors.New("root node cannot be a whitespace")
|
||||
ErrNotImplemented = errors.New("not implemented")
|
||||
ErrMultipleRoots = errors.New("multiple roots")
|
||||
)
|
||||
|
||||
func duplicateDefinition(name string) error {
|
||||
@ -70,12 +74,29 @@ func (s *Syntax) register(d definition) error {
|
||||
}
|
||||
|
||||
if d.commitType()&Root != 0 {
|
||||
if s.explicitRoot {
|
||||
return ErrMultipleRoots
|
||||
}
|
||||
|
||||
if s.root != nil {
|
||||
s.root.setCommitType(s.root.commitType() &^ Root)
|
||||
}
|
||||
|
||||
s.root = d
|
||||
s.root.setCommitType(s.root.commitType() | Root)
|
||||
s.explicitRoot = true
|
||||
} else if !s.explicitRoot {
|
||||
s.root = d
|
||||
if s.root != nil {
|
||||
s.root.setCommitType(s.root.commitType() &^ Root)
|
||||
}
|
||||
|
||||
s.root = d
|
||||
s.root.setCommitType(s.root.commitType() | Root)
|
||||
}
|
||||
|
||||
// TODO: verify that definition names match the symbol criteria, or figure a better naming for the
|
||||
// whitespace
|
||||
|
||||
return s.registry.setDefinition(d)
|
||||
}
|
||||
|
||||
@ -144,6 +165,19 @@ func (s *Syntax) Init() error {
|
||||
return ErrRootAlias
|
||||
}
|
||||
|
||||
if s.root.commitType()&Whitespace != 0 {
|
||||
return ErrRootWhitespace
|
||||
}
|
||||
|
||||
s.registry = initWhitespace(s.registry)
|
||||
|
||||
for _, def := range s.registry.definitions {
|
||||
if def.commitType()&Root != 0 {
|
||||
s.root = def
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.root.validate(s.registry, &idSet{}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
ws:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
|
||||
wsc:alias = ws | comment;
|
||||
wschar:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
|
||||
wsc:alias = wschar | comment;
|
||||
|
||||
block-comment:alias = "/*" ("*" [^/] | [^*])* "*/";
|
||||
line-comment:alias = "//" [^\n]*;
|
||||
@ -57,9 +57,11 @@ expression:alias = terminal
|
||||
| choice;
|
||||
|
||||
alias = "alias";
|
||||
ws = "ws";
|
||||
nows = "nows";
|
||||
doc = "doc";
|
||||
root = "root";
|
||||
flag:alias = alias | doc | root;
|
||||
flag:alias = alias | ws | nows | doc | root;
|
||||
definition = symbol (":" flag)* wsc* "=" wsc* expression;
|
||||
|
||||
definitions:alias = definition (wsc* ";" (wsc | ";")* definition)*;
|
||||
|
172
whitespace.go
Normal file
172
whitespace.go
Normal file
@ -0,0 +1,172 @@
|
||||
package treerack
|
||||
|
||||
import "fmt"
|
||||
|
||||
const whitespaceName = ":ws"
|
||||
|
||||
func brokenRegistryError(err error) error {
|
||||
return fmt.Errorf("broken registry: %v", err)
|
||||
}
|
||||
|
||||
func splitWhitespaceDefs(all map[string]definition) ([]definition, []definition) {
|
||||
var whitespaceDefs, nonWhitespaceDefs []definition
|
||||
for _, def := range all {
|
||||
if def.commitType()&Whitespace != 0 {
|
||||
def.setCommitType(def.commitType() | Alias)
|
||||
whitespaceDefs = append(whitespaceDefs, def)
|
||||
continue
|
||||
}
|
||||
|
||||
nonWhitespaceDefs = append(nonWhitespaceDefs, def)
|
||||
}
|
||||
|
||||
return whitespaceDefs, nonWhitespaceDefs
|
||||
}
|
||||
|
||||
func splitRoot(defs []definition) (definition, []definition) {
|
||||
var (
|
||||
root definition
|
||||
rest []definition
|
||||
)
|
||||
|
||||
for _, def := range defs {
|
||||
if def.commitType()&Root != 0 {
|
||||
root = def
|
||||
continue
|
||||
}
|
||||
|
||||
rest = append(rest, def)
|
||||
}
|
||||
|
||||
return root, rest
|
||||
}
|
||||
|
||||
func mergeWhitespaceDefs(ws []definition) definition {
|
||||
var names []string
|
||||
for _, def := range ws {
|
||||
names = append(names, def.nodeName())
|
||||
}
|
||||
|
||||
return newChoice(whitespaceName, Alias, names)
|
||||
}
|
||||
|
||||
// TODO: validate min and max
|
||||
|
||||
func applyWhitespaceToSeq(s *sequenceDefinition) []definition {
|
||||
var (
|
||||
defs []definition
|
||||
items []SequenceItem
|
||||
)
|
||||
|
||||
for i, item := range s.items {
|
||||
if i > 0 {
|
||||
items = append(items, SequenceItem{Name: whitespaceName, Min: 0, Max: -1})
|
||||
}
|
||||
|
||||
if item.Max >= 0 && item.Max <= 1 {
|
||||
items = append(items, item)
|
||||
continue
|
||||
}
|
||||
|
||||
singleItem := SequenceItem{Name: item.Name, Min: 1, Max: 1}
|
||||
|
||||
restName := item.Name + ":wsrest"
|
||||
restDef := newSequence(restName, Alias, []SequenceItem{{Name: whitespaceName, Min: 0, Max: -1}, singleItem})
|
||||
defs = append(defs, restDef)
|
||||
|
||||
restItems := SequenceItem{Name: restName, Min: 0, Max: -1}
|
||||
if item.Min > 0 {
|
||||
restItems.Min = item.Min - 1
|
||||
}
|
||||
if item.Max > 0 {
|
||||
restItems.Min = item.Max - 1
|
||||
}
|
||||
|
||||
if item.Min > 0 {
|
||||
items = append(items, singleItem, restItems)
|
||||
continue
|
||||
}
|
||||
|
||||
optName := item.Name + ":wsopt"
|
||||
optDef := newSequence(optName, Alias, []SequenceItem{singleItem, restItems})
|
||||
defs = append(defs, optDef)
|
||||
items = append(items, SequenceItem{Name: optName, Min: 0, Max: 1})
|
||||
}
|
||||
|
||||
s = newSequence(s.nodeName(), s.commitType(), items)
|
||||
defs = append(defs, s)
|
||||
return defs
|
||||
}
|
||||
|
||||
func applyWhitespace(defs []definition) []definition {
|
||||
var defsWS []definition
|
||||
for _, def := range defs {
|
||||
if def.commitType()&NoWhitespace != 0 {
|
||||
defsWS = append(defsWS, def)
|
||||
continue
|
||||
}
|
||||
|
||||
seq, ok := def.(*sequenceDefinition)
|
||||
if !ok {
|
||||
defsWS = append(defsWS, def)
|
||||
continue
|
||||
}
|
||||
|
||||
defsWS = append(defsWS, applyWhitespaceToSeq(seq)...)
|
||||
}
|
||||
|
||||
return defsWS
|
||||
}
|
||||
|
||||
func applyWhitespaceRoot(root definition) (definition, definition) {
|
||||
original, name := root, root.nodeName()
|
||||
wsName := ":wsroot:" + name
|
||||
|
||||
original.setNodeName(wsName)
|
||||
original.setCommitType(original.commitType() &^ Root)
|
||||
original.setCommitType(original.commitType() | Alias)
|
||||
|
||||
root = newSequence(name, Root, []SequenceItem{{
|
||||
Name: whitespaceName,
|
||||
Min: 0,
|
||||
Max: -1,
|
||||
}, {
|
||||
Name: wsName,
|
||||
Min: 1,
|
||||
Max: 1,
|
||||
}, {
|
||||
Name: whitespaceName,
|
||||
Min: 0,
|
||||
Max: -1,
|
||||
}})
|
||||
|
||||
return original, root
|
||||
}
|
||||
|
||||
func registerPatched(r *registry, defs ...definition) {
|
||||
for _, def := range defs {
|
||||
if err := r.setDefinition(def); err != nil {
|
||||
panic(brokenRegistryError(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func initWhitespace(r *registry) *registry {
|
||||
whitespaceDefs, defs := splitWhitespaceDefs(r.definitions)
|
||||
if len(whitespaceDefs) == 0 {
|
||||
return r
|
||||
}
|
||||
|
||||
whitespace := mergeWhitespaceDefs(whitespaceDefs)
|
||||
defs = applyWhitespace(defs)
|
||||
|
||||
root, defs := splitRoot(defs)
|
||||
originalRoot, root := applyWhitespaceRoot(root)
|
||||
|
||||
r = newRegistry()
|
||||
registerPatched(r, whitespace)
|
||||
registerPatched(r, whitespaceDefs...)
|
||||
registerPatched(r, defs...)
|
||||
registerPatched(r, originalRoot, root)
|
||||
return r
|
||||
}
|
@ -4,22 +4,25 @@ import "testing"
|
||||
|
||||
const (
|
||||
csvWithoutWhitespaceSupport = `
|
||||
ws:alias = [ \t]*;
|
||||
ws:alias = [ \t];
|
||||
word-char:alias = [^\n, \t];
|
||||
cell = (word-char (ws* word-char)*)?;
|
||||
rest-cell:alias = "," ws* cell;
|
||||
line = cell ws* (rest-cell (ws* rest-cell)*)?;
|
||||
rest-line:alias = "\n" ws* line;
|
||||
document = ws* (line ws* (rest-line (ws* rest-line)*)?)? ws*;
|
||||
`
|
||||
|
||||
csvWithWhitespaceSupport = `
|
||||
ws:ws = [ \t];
|
||||
cell = [^\n, \t]*;
|
||||
line = ws cell (ws "," ws cell)* ws;
|
||||
line = cell ("," cell)*;
|
||||
document = (line ("\n" line)*)?;
|
||||
`
|
||||
)
|
||||
|
||||
func TestCSVWhitespace(t *testing.T) {
|
||||
t.Run("wihout whitespace support", func(t *testing.T) {
|
||||
s, err := openSyntaxString(csvWithoutWhitespaceSupport)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
runTestsSyntax(t, s, []testItem{{
|
||||
tests := []testItem{{
|
||||
title: "empty",
|
||||
node: &Node{
|
||||
Name: "document",
|
||||
@ -135,6 +138,146 @@ func TestCSVWhitespace(t *testing.T) {
|
||||
}},
|
||||
}},
|
||||
},
|
||||
}})
|
||||
}, {
|
||||
title: "csv with tabs",
|
||||
text: "a,\tb, c",
|
||||
ignorePosition: true,
|
||||
node: &Node{
|
||||
Name: "document",
|
||||
Nodes: []*Node{{
|
||||
Name: "line",
|
||||
Nodes: []*Node{{
|
||||
Name: "cell",
|
||||
}, {
|
||||
Name: "cell",
|
||||
}, {
|
||||
Name: "cell",
|
||||
}},
|
||||
}},
|
||||
},
|
||||
}, {
|
||||
title: "whitespace between lines",
|
||||
text: " a, b, c \n d, e, f ",
|
||||
node: &Node{
|
||||
Name: "document",
|
||||
To: 19,
|
||||
Nodes: []*Node{{
|
||||
Name: "line",
|
||||
From: 1,
|
||||
To: 8,
|
||||
Nodes: []*Node{{
|
||||
Name: "cell",
|
||||
From: 1,
|
||||
To: 2,
|
||||
}, {
|
||||
Name: "cell",
|
||||
From: 4,
|
||||
To: 5,
|
||||
}, {
|
||||
Name: "cell",
|
||||
From: 7,
|
||||
To: 8,
|
||||
}},
|
||||
}, {
|
||||
Name: "line",
|
||||
From: 11,
|
||||
To: 18,
|
||||
Nodes: []*Node{{
|
||||
Name: "cell",
|
||||
From: 11,
|
||||
To: 12,
|
||||
}, {
|
||||
Name: "cell",
|
||||
From: 14,
|
||||
To: 15,
|
||||
}, {
|
||||
Name: "cell",
|
||||
From: 17,
|
||||
To: 18,
|
||||
}},
|
||||
}},
|
||||
},
|
||||
}, {
|
||||
title: "just a space",
|
||||
text: " ",
|
||||
ignorePosition: true,
|
||||
node: &Node{
|
||||
Name: "document",
|
||||
},
|
||||
}, {
|
||||
title: "cell with spaces in it",
|
||||
text: "cell content 1/1, cell content 1/2\ncell content 2/1, cell content 2/2",
|
||||
node: &Node{
|
||||
Name: "document",
|
||||
To: 69,
|
||||
Nodes: []*Node{{
|
||||
Name: "line",
|
||||
To: 34,
|
||||
Nodes: []*Node{{
|
||||
Name: "cell",
|
||||
To: 16,
|
||||
}, {
|
||||
Name: "cell",
|
||||
From: 18,
|
||||
To: 34,
|
||||
}},
|
||||
}, {
|
||||
Name: "line",
|
||||
From: 35,
|
||||
To: 69,
|
||||
Nodes: []*Node{{
|
||||
Name: "cell",
|
||||
From: 35,
|
||||
To: 51,
|
||||
}, {
|
||||
Name: "cell",
|
||||
From: 53,
|
||||
To: 69,
|
||||
}},
|
||||
}},
|
||||
},
|
||||
}, {
|
||||
title: "multiple empty lines",
|
||||
text: "\n\n",
|
||||
ignorePosition: true,
|
||||
node: &Node{
|
||||
Name: "document",
|
||||
Nodes: []*Node{{
|
||||
Name: "line",
|
||||
Nodes: []*Node{{
|
||||
Name: "cell",
|
||||
}},
|
||||
}, {
|
||||
Name: "line",
|
||||
Nodes: []*Node{{
|
||||
Name: "cell",
|
||||
}},
|
||||
}, {
|
||||
Name: "line",
|
||||
Nodes: []*Node{{
|
||||
Name: "cell",
|
||||
}},
|
||||
}},
|
||||
},
|
||||
}}
|
||||
|
||||
t.Run("without whitespace support", func(t *testing.T) {
|
||||
s, err := openSyntaxString(csvWithoutWhitespaceSupport)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
runTestsSyntax(t, s, tests)
|
||||
})
|
||||
|
||||
t.Run("with whitespace support", func(t *testing.T) {
|
||||
s, err := openSyntaxString(csvWithWhitespaceSupport)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
runTestsSyntax(t, s, tests)
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user