1
0

lib to format treerack documents

This commit is contained in:
Arpad Ryszka 2026-05-30 20:14:24 +02:00
parent 87485ffcd2
commit d9f1c70d99
13 changed files with 948 additions and 108 deletions

View File

@ -129,8 +129,7 @@ cover-cmd: .coverprofile-cmd
showcover-cmd: .coverprofile-cmd
go tool cover -html .coverprofile-cmd
.PHONY: cpu.out
cpu.out:
cpu.out: $(sources)
go test -v -run TestMMLFile -cpuprofile cpu.out
cpu: cpu.out

View File

@ -8,6 +8,7 @@ import (
const (
charClassEscape = '\\'
charClassBanned = "\\[]^-\b\f\n\r\t\v"
charSeqBanned = "\\\"\b\f\n\r\t\v"
)
func newChar(
@ -44,11 +45,15 @@ func (p *charParser) isSingleChar() bool {
return !p.not && len(p.chars) == 1 && len(p.ranges) == 0
}
func (p *charParser) format(_ *registry, f formatFlags) string {
func (p *charParser) format(_ *registry, _ formatOptions) string {
if p.not && len(p.chars) == 0 && len(p.ranges) == 0 {
return "."
}
if p.isSingleChar() {
return "\"" + string(escape(charClassEscape, []rune(charSeqBanned), p.chars)) + "\""
}
esc := func(c ...rune) []rune {
return escape(charClassEscape, []rune(charClassBanned), c)
}

View File

@ -134,24 +134,45 @@ func (d *choiceDefinition) builder() builder {
return d.cbuilder
}
func (d *choiceDefinition) format(r *registry, f formatFlags) string {
func (d *choiceDefinition) formatChoice(r *registry, o formatOptions) string {
var chars []rune
sep := []rune{' ', '|', ' '}
if o.mode&formatPretty > 0 {
sep = []rune{'\n', '|', ' '}
}
for i := range d.options {
if i > 0 {
chars = append(chars, []rune(" | ")...)
chars = append(chars, sep...)
}
optionDef := r.definition[d.options[i]]
if optionDef.commitType()&userDefined != 0 {
chars = append(chars, []rune(optionDef.nodeName())...)
} else {
chars = append(chars, []rune(optionDef.format(r, f))...)
chars = append(chars, []rune(optionDef.format(r, o))...)
}
}
return string(chars)
}
func (d *choiceDefinition) format(r *registry, o formatOptions) string {
oo := o
oo.mode &^= formatPretty
f := d.formatChoice(r, oo)
if o.mode&formatPretty == 0 {
return string(f)
}
if len(f) <= o.targetWidth {
return string(f)
}
f = d.formatChoice(r, o)
return string(f)
}
func (p *choiceParser) generate(w io.Writer, done map[string]bool) error {
if done[p.name] {
return nil

View File

@ -204,7 +204,6 @@ func addDefinition(s *Syntax, n *Node) error {
func define(s *Syntax, syntaxTree *Node) error {
syntaxTree = dropComments(syntaxTree)
for _, n := range syntaxTree.Nodes {
if err := addDefinition(s, n); err != nil {
return err

638
format.go Normal file
View File

@ -0,0 +1,638 @@
package treerack
import (
"bytes"
"fmt"
"io"
"strings"
"unicode"
)
const initialTargetWidth = 112
type commentFormat int
const (
commentFormatNone commentFormat = iota
standaloneComment
headerComment
suffixComment
inlineComment
)
type formatItem struct {
commentFormat commentFormat
node *Node
}
type formatGroup struct {
items []formatItem
}
func topLevelCommentFormat(ast *Node, i int, n *Node) commentFormat {
if n.Name != "comment" {
return commentFormatNone
}
if i > 0 &&
ast.Nodes[i-1].Name != "comment" &&
!strings.Contains(string(ast.tokens[ast.Nodes[i-1].To:n.From]), "\n") {
return suffixComment
}
if len(ast.Nodes) == i+1 {
return standaloneComment
}
next := ast.Nodes[i+1]
if next.Name == "comment" {
return standaloneComment
}
var lines int
space := ast.tokens[n.To:next.From]
for _, s := range space {
if s == '\n' {
lines++
}
}
if lines >= 2 {
return standaloneComment
}
return headerComment
}
func groupASTByComments(ast *Node) []formatGroup {
var (
groups []formatGroup
currentGroup formatGroup
)
for i, n := range ast.Nodes {
last := len(currentGroup.items) - 1
cf := topLevelCommentFormat(ast, i, n)
item := formatItem{
commentFormat: cf,
node: n,
}
if cf == commentFormatNone {
if last >= 0 && currentGroup.items[last].commentFormat == standaloneComment {
groups = append(groups, currentGroup)
currentGroup.items = nil
}
currentGroup.items = append(currentGroup.items, item)
continue
}
if cf == suffixComment {
currentGroup.items = append(currentGroup.items, item)
continue
}
if last >= 0 {
groups = append(groups, currentGroup)
}
currentGroup.items = []formatItem{item}
}
groups = append(groups, currentGroup)
return groups
}
func trimComment(text string) string {
var inBlockComment, inLineComment bool
tr := []rune(text)
rr := make([]rune, 0, len(tr))
for i := 0; i < len(tr); i++ {
r := tr[i]
if inBlockComment {
if r != '*' || len(tr) <= i+1 || tr[i+1] != '/' {
rr = append(rr, r)
continue
}
rr = append(rr, '*', '/')
inBlockComment = false
if len(tr) > i+2 && !unicode.IsSpace(tr[i+2]) {
rr = append(rr, ' ')
}
i++
continue
}
if inLineComment {
rr = append(rr, r)
inLineComment = r != '\n'
continue
}
if r == '/' && len(tr) > i+1 && tr[i+1] == '*' {
rr = append(rr, '/', '*')
inBlockComment = true
i++
continue
}
if r == '/' && len(tr) > i+1 && tr[i+1] == '/' {
rr = append(rr, '/', '/')
inLineComment = true
if len(tr) > i+2 && tr[i+2] != ' ' {
rr = append(rr, ' ')
}
i++
continue
}
if r == '\n' || len(rr) > 0 && !unicode.IsSpace(rr[len(rr)-1]) {
rr = append(rr, r)
}
}
lines := strings.Split(string(rr), "\n")
for i := range lines {
lines[i] = strings.TrimRightFunc(lines[i], unicode.IsSpace)
}
return strings.Join(lines, "\n")
}
func formatComment(out io.Writer, n *Node) error {
text := n.Text()
text = trimComment(text)
_, err := fmt.Fprint(out, text)
return err
}
func formatDefinitionName(item formatItem) string {
name := item.node.Nodes[0].Text()
flags := make([]string, 0, len(item.node.Nodes)-2)
for i := 1; i < len(item.node.Nodes)-1; i++ {
flags = append(flags, item.node.Nodes[i].Name)
}
if len(flags) > 0 {
name += ":" + strings.Join(flags, ":")
}
return name
}
func formatItemNames(g formatGroup) ([]string, int) {
var maxWidth int
ordered := make([]string, 0, len(g.items))
for _, item := range g.items {
if item.commentFormat != commentFormatNone {
ordered = append(ordered, "")
continue
}
name := formatDefinitionName(item)
maxWidth = max(maxWidth, len(name))
ordered = append(ordered, name)
}
return ordered, maxWidth
}
func formatAnyChar(out io.Writer) error {
_, err := fmt.Fprint(out, ".")
return err
}
func formatCharClass(out io.Writer, n *Node) error {
_, err := fmt.Fprint(out, n.Text())
return err
}
func formatCharSequence(out io.Writer, n *Node) error {
_, err := fmt.Fprint(out, n.Text())
return err
}
func formatSymbol(out io.Writer, n *Node) error {
_, err := fmt.Fprint(out, n.Text())
return err
}
func decTargetWidth(w, by int) int {
if w <= 0 {
return w
}
w -= by
if w < 0 {
w = 0
}
return w
}
func formatSequenceItemNode(out io.Writer, targetWidth int, n *Node) error {
var (
min, max int
err error
)
fprint := func(a ...any) {
if err != nil {
return
}
_, err = fmt.Fprint(out, a...)
}
if len(n.Nodes) == 2 {
if min, max, err = getQuantity(n.Nodes[1]); err != nil {
return err
}
}
min, max = normalizeItemRange(min, max)
needsQuantifier := min != 1 || max != 1
isChoice := n.Nodes[0].Name == "choice"
isChoiceOfMultiple := isChoice && len(n.Nodes[0].Nodes) > 1
isSequence := n.Nodes[0].Name == "sequence"
isSequenceOfMultiple := isSequence && len(n.Nodes[0].Nodes) > 1
needsGrouping := isChoiceOfMultiple || isSequenceOfMultiple
if needsGrouping {
var buf bytes.Buffer
targetWidth = decTargetWidth(targetWidth, 2)
if err := formatExpression(&buf, targetWidth, n.Nodes[0]); err != nil {
return err
}
multiline := strings.Contains(buf.String(), "\n")
if multiline {
lines := strings.Split(buf.String(), "\n")
fprint("( ")
fprint(lines[0])
for _, l := range lines[1:] {
fprint("\n ")
fprint(l)
}
fprint("\n )")
} else {
fprint("(")
if _, err := io.Copy(out, &buf); err != nil {
return err
}
fprint(")")
}
} else {
if err := formatExpression(out, targetWidth, n.Nodes[0]); err != nil {
return err
}
}
if !needsQuantifier {
return nil
}
if min == 0 && max == 1 {
fprint("?")
return err
}
if min == 0 && max < 0 {
fprint("*")
return err
}
if min == 1 && max < 0 {
fprint("+")
return err
}
fprint("{")
if min == max {
fprint(min)
} else {
if min > 0 {
fprint(min)
}
fprint(",")
if max >= 0 {
fprint(max)
}
}
return err
}
func formatSequenceItemNodes(out io.Writer, targetWidth int, n []*Node) error {
sep := " "
if targetWidth >= 0 {
sep = "\n "
}
for i, ni := range n {
if i > 0 {
if _, err := fmt.Fprint(out, sep); err != nil {
return err
}
}
if ni.Name == "comment" {
if err := formatComment(out, ni); err != nil {
return err
}
continue
}
if err := formatSequenceItemNode(out, targetWidth, ni); err != nil {
return err
}
}
return nil
}
func formatSequence(out io.Writer, targetWidth int, n []*Node) error {
var buf bytes.Buffer
if err := formatSequenceItemNodes(&buf, -1, n); err != nil {
return err
}
if targetWidth >= 0 && buf.Len() > targetWidth {
(&buf).Reset()
if err := formatSequenceItemNodes(&buf, targetWidth, n); err != nil {
return err
}
}
_, err := io.Copy(out, &buf)
return err
}
func formatChoiceOptionNodes(out io.Writer, targetWidth int, n []*Node) error {
sep, commentSep := " | ", " "
if targetWidth >= 0 {
sep, commentSep = "\n| ", "\n"
}
for i, ni := range n {
if ni.Name == "comment" {
if i > 0 {
if _, err := fmt.Fprint(out, commentSep); err != nil {
return err
}
}
if err := formatComment(out, ni); err != nil {
return err
}
continue
}
if i > 0 {
if _, err := fmt.Fprint(out, sep); err != nil {
return err
}
}
if err := formatExpression(out, targetWidth, ni); err != nil {
return err
}
}
return nil
}
func formatChoice(out io.Writer, targetWidth int, n []*Node) error {
var buf bytes.Buffer
if err := formatChoiceOptionNodes(&buf, -1, n); err != nil {
return err
}
if targetWidth >= 0 && buf.Len() > targetWidth {
(&buf).Reset()
if err := formatChoiceOptionNodes(&buf, targetWidth, n); err != nil {
return err
}
}
_, err := io.Copy(out, &buf)
return err
}
func formatExpression(out io.Writer, targetWidth int, n *Node) error {
var err error
switch n.Name {
case "comment":
err = formatComment(out, n)
case "any-char":
err = formatAnyChar(out)
case "char-class":
err = formatCharClass(out, n)
case "char-sequence":
err = formatCharSequence(out, n)
case "symbol":
err = formatSymbol(out, n)
case "sequence":
err = formatSequence(out, targetWidth, n.Nodes)
case "choice":
err = formatChoice(out, targetWidth, n.Nodes)
}
return err
}
func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name string, n *Node) error {
if _, err := fmt.Fprintf(out, "%s%s = ", name, pad[:namesWidth-len(name)]); err != nil {
return err
}
var buf bytes.Buffer
targetWidth = decTargetWidth(targetWidth, namesWidth+3)
if err := formatExpression(&buf, targetWidth, n.Nodes[len(n.Nodes)-1]); err != nil {
return err
}
var err error
fprint := func(a ...any) {
if err != nil {
return
}
_, err = fmt.Fprint(out, a...)
}
lines := strings.Split(buf.String(), "\n")
fprint(lines[0])
for _, l := range lines[1:] {
fprint("\n ")
fprint(pad)
fprint(l)
}
fprint(";")
return err
}
func formatASTGroup(out io.Writer, g formatGroup) error {
if g.items[0].commentFormat == standaloneComment {
return formatComment(out, g.items[0].node)
}
hasHeaderComment := g.items[0].commentFormat == headerComment
if hasHeaderComment {
if err := formatComment(out, g.items[0].node); err != nil {
return err
}
g.items = g.items[1:]
}
names, namesWidth := formatItemNames(g)
pad := strings.Join(make([]string, namesWidth+1), " ")
for i, item := range g.items {
name := names[i]
if item.commentFormat == suffixComment {
if _, err := fmt.Fprint(out, " "); err != nil {
return err
}
formatComment(out, item.node)
continue
}
if hasHeaderComment || i > 0 {
if _, err := fmt.Fprintln(out); err != nil {
return err
}
}
if err := formatDefinition(
out,
initialTargetWidth,
namesWidth,
pad,
name,
item.node,
); err != nil {
return err
}
}
return nil
}
func formatAST(out io.Writer, ast *Node) error {
// drop whitespace comments
// use line comments by default
// comment types:
// - standalone comment
// - header comment
// - suffix comment
// - inline comment
//
// standalone comment:
// - preceeded by definition or at least two empty lines and followed by at least two empty lines
// - separate it by two empty lines above and below
//
// header comment:
// - separated from the subsequent definition by zero or one empty lines
// - separate it by two empty lines above and one empty line below
//
// suffix comment:
// - starts on the same line as the definition it belongs to
// - append to the definition
// - if it consists of multiple lines, append a new line below
//
// inline comment:
// - it's inside a definition
// - if it's before the eq sign, discard name padding and use block comment
// - if it's in an expression, and falls on its own line, and fits on the previous line, put it there
// - if it's in an expression, and falls on its own line, use a line comment
// - if it's in an expression, and it's followed by non-comment on the same line, use block comment
// - if it consists of multiple lines, append a new line below the definition
groups := groupASTByComments(ast)
for i, g := range groups {
if i > 0 {
if _, err := fmt.Fprint(out, "\n\n"); err != nil {
return err
}
}
if err := formatASTGroup(out, g); err != nil {
return err
}
}
return nil
}
func formatDefinitions(out io.Writer, s *Syntax) error {
var o formatOptions
o.mode = formatPretty
o.targetWidth = initialTargetWidth
var (
namesWidth int
orderedDefs []string
)
defs := make(map[string]definition)
for _, def := range s.registry.definitions {
if def.commitType()&userDefined == 0 {
continue
}
defName := def.nodeName()
ct := def.commitType()
ct &^= userDefined
if sq, ok := def.(*sequenceDefinition); ok && sq.isCharSequence(s.registry) {
ct &^= NoWhitespace
}
if ct != None {
defName = fmt.Sprintf("%s:%v", defName, ct)
}
orderedDefs = append(orderedDefs, defName)
defs[defName] = def
namesWidth = max(namesWidth, len([]rune(defName)))
}
o.targetWidth = decTargetWidth(o.targetWidth, namesWidth+3)
pad := strings.Join(make([]string, namesWidth+1), " ")
for _, name := range orderedDefs {
def := defs[name]
f := def.format(s.registry, o)
lines := strings.Split(f, "\n")
if _, err := fmt.Fprintf(
out,
"%s%s = %s",
name,
pad[:namesWidth-len(name)],
lines[0],
); err != nil {
return err
}
for _, l := range lines[1:] {
if _, err := fmt.Fprintf(out, "\n%s %s", pad, l); err != nil {
return err
}
}
if _, err := fmt.Fprint(out, ";\n"); err != nil {
return err
}
}
return nil
}

View File

@ -35,7 +35,7 @@ func testDefinitionFormatItem(t *testing.T, treerack *Syntax, f formatFlags, tes
t.Fatal("failed to register definition")
}
output := def.format(s.registry, f)
output := def.format(s.registry, formatOptions{mode: f})
if output != test.output {
t.Error("invalid definition format")
t.Log("got: ", output)
@ -63,23 +63,23 @@ func TestCharFormat(t *testing.T) {
}, {
title: "one char",
definition: "[a]",
output: "[a]",
output: `"a"`,
}, {
title: "escaped char",
definition: "[\\a]",
output: "[a]",
output: `"a"`,
}, {
title: "escaped control char",
definition: "[\\^]",
output: "[\\^]",
output: `"^"`,
}, {
title: "escaped whitespace char",
definition: "[\\n]",
output: "[\\n]",
output: `"\n"`,
}, {
title: "escaped verbatim whitespace char",
definition: "[\n]",
output: "[\\n]",
output: `"\n"`,
}, {
title: "escaped range",
definition: "[\\b-\\v]",
@ -123,7 +123,7 @@ func TestSequenceFormat(t *testing.T) {
}, {
title: "quantifiers, 0-or-more, single char",
syntax: `def = "a"*`,
output: `[a]*`,
output: `"a"*`,
}, {
title: "quantifiers, 0-or-more",
syntax: `def = "abc"*`,
@ -131,7 +131,7 @@ func TestSequenceFormat(t *testing.T) {
}, {
title: "quantifiers, 1-or-more, single char",
syntax: `def = "a"+`,
output: `[a]+`,
output: `"a"+`,
}, {
title: "quantifiers, 1-or-more",
syntax: `def = "abc"+`,
@ -139,7 +139,7 @@ func TestSequenceFormat(t *testing.T) {
}, {
title: "quantifiers, 0-or-one, single char",
syntax: `def = "a"?`,
output: `[a]?`,
output: `"a"?`,
}, {
title: "quantifiers, 0-or-one",
syntax: `def = "abc"?`,
@ -147,7 +147,7 @@ func TestSequenceFormat(t *testing.T) {
}, {
title: "quantifiers, exact number, single char",
syntax: `def = "a"{3}`,
output: `[a]{3}`,
output: `"a"{3}`,
}, {
title: "quantifiers, exact number",
syntax: `def = "abc"{3}`,
@ -155,7 +155,7 @@ func TestSequenceFormat(t *testing.T) {
}, {
title: "quantifiers, max, single char",
syntax: `def = "a"{0, 3}`,
output: `[a]{,3}`,
output: `"a"{,3}`,
}, {
title: "quantifiers, max",
syntax: `def = "abc"{0, 3}`,
@ -163,7 +163,7 @@ func TestSequenceFormat(t *testing.T) {
}, {
title: "quantifiers, min, single char",
syntax: `def = "a"{3,}`,
output: `[a]{3,}`,
output: `"a"{3,}`,
}, {
title: "quantifiers, min",
syntax: `def = "abc"{3,}`,
@ -171,7 +171,7 @@ func TestSequenceFormat(t *testing.T) {
}, {
title: "quantifiers, range, single char",
syntax: `def = "a"{3, 9}`,
output: `[a]{3,9}`,
output: `"a"{3,9}`,
}, {
title: "quantifiers, range",
syntax: `def = "abc"{3, 9}`,
@ -183,7 +183,7 @@ func TestSequenceFormat(t *testing.T) {
}, {
title: "choice in sequence, single char",
syntax: `def = "a" ("b" | "c")`,
output: `[a] ([b] | [c])`,
output: `"a" ("b" | "c")`,
}, {
title: "choice in sequence",
syntax: `def = "abc" ("def" | "ghi")`,
@ -191,7 +191,7 @@ func TestSequenceFormat(t *testing.T) {
}, {
title: "grouped quantifier, single char",
syntax: `def = ("a" "b"){3}`,
output: `([a] [b]){3}`,
output: `("a" "b"){3}`,
}, {
title: "grouped quantifier",
syntax: `def = ("abc" "def"){3}`,
@ -203,7 +203,7 @@ func TestChoiceFormat(t *testing.T) {
testDefinitionFormat(t, formatNone, []formatDefinitionTestItem{{
title: "choice of char sequences, single char",
syntax: `def = "a" | "b" | "c"`,
output: `[a] | [b] | [c]`,
output: `"a" | "b" | "c"`,
}, {
title: "choice of char sequences",
syntax: `def = "abc" | "def" | "ghi"`,
@ -211,7 +211,7 @@ func TestChoiceFormat(t *testing.T) {
}, {
title: "choice of inline sequences, single char",
syntax: `def = "a" "b" | "c" "d" | "e" "f"`,
output: `[a] [b] | [c] [d] | [e] [f]`,
output: `"a" "b" | "c" "d" | "e" "f"`,
}, {
title: "choice of inline sequences",
syntax: `def = "abc" "def" | "ghi" "jkl" | "mno" "pqr"`,
@ -223,8 +223,92 @@ func TestChoiceFormat(t *testing.T) {
}})
}
func TestMultiLine(t *testing.T) {
}
const testDoc = `/*
foo
*/
// bar
// bar
// baz
/* foo
bar baz */ // foo bar baz
wschar:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wsc:ws = wschar | comment;
block-comment:alias:nows = "/*" ("*" [^/] | [^*])* "*/";
line-comment:alias:nows = "//" [^\n]*;
comment-segment:alias:nows = line-comment | block-comment;
ws-no-nl:alias:nows = " "
| "\t"
| "\b"
/* this one */ /* is a */
| "\f"
/* form feed */ /* for sure */
| "\r"
| "\v";
comment:nows = comment-segment
/* segment is not the best name */ /* but */
(ws-no-nl* "\n"? ws-no-nl* /* fine */ comment-segment)*;
any-char = "."; // equivalent to [^]
func TestLineSplit(t *testing.T) {
// caution: newline is accepted
class-not = "^";
class-char:nows = [^\\\[\]\^\-] | "\\" .;
char-range:nows = class-char "-" class-char;
char-class:nows = "[" class-not? (class-char | char-range)* "]";
// newline is accepted
sequence-char:nows = [^\\"] | "\\" .;
char-sequence:nows = "\"" sequence-char* "\"";
terminal:alias = any-char | char-class | char-sequence;
symbol:nows = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+;
group:alias = "(" expression ")";
number:alias:nows = [0-9]+;
count = number;
count-quantifier = "{" count "}";
range-from = number;
range-to = number;
range-quantifier = "{" range-from? "," range-to? "}";
one-or-more = "+";
zero-or-more = "*";
zero-or-one = "?";
quantity:alias = count-quantifier | range-quantifier | one-or-more | zero-or-more | zero-or-one;
item:nows = (terminal | symbol | group) quantity?;
sequence = item+;
option:alias = terminal | symbol | group | sequence;
// DOC: how the order matters
choice = option ("|" option)+;
// DOC: not having 'not' needs some tricks sometimes
expression:alias = terminal | symbol | group | sequence | choice;
alias = "alias";
ws = "ws";
nows = "nows";
kw = "kw";
nokw = "nokw";
failpass = "failpass";
root = "root";
flag:alias = alias | ws | nows | kw | nokw | failpass | root;
definition-name:alias:nows = symbol (":" flag)*;
definition = definition-name "=" expression;
definitions:alias = definition (";"+ definition)*;
syntax:root = ";"* definitions? ";"*;`
func TestDocFormat(t *testing.T) {
in := bytes.NewBufferString(testDoc)
s := &Syntax{}
if err := s.ReadSyntax(in); err != nil {
t.Fatal(err)
}
out := bytes.NewBuffer(nil)
if err := s.Format(out); err != nil {
t.Fatal(err)
}
if out.String() != testDoc {
t.Log(testDoc)
t.Log(out.String())
t.Fatal()
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -771,6 +771,10 @@ const (
formatIncludeComments
)
type formatOptions struct {
mode formatFlags
targetWidth int
}
type ParseError struct {
Input string
Offset int

View File

@ -4,6 +4,7 @@ import (
"fmt"
"io"
"strconv"
"strings"
)
type sequenceDefinition struct {
@ -43,26 +44,26 @@ func (d *sequenceDefinition) setID(id int) { d.id = id }
func (d *sequenceDefinition) commitType() CommitType { return d.commit }
func (d *sequenceDefinition) setCommitType(ct CommitType) { d.commit = ct }
func normalizeItemRange(item SequenceItem) SequenceItem {
if item.Min == 0 && item.Max == 0 {
item.Min, item.Max = 1, 1
return item
func normalizeItemRange(min, max int) (int, int) {
if min == 0 && max == 0 {
min, max = 1, 1
return min, max
}
if item.Min <= 0 {
item.Min = 0
if min <= 0 {
min = 0
}
if item.Max <= 0 {
item.Max = -1
if max <= 0 {
max = -1
}
return item
return min, max
}
func (d *sequenceDefinition) initRanges() {
for i, item := range d.items {
item = normalizeItemRange(item)
item.Min, item.Max = normalizeItemRange(item.Min, item.Max)
d.items[i] = item
d.ranges = append(d.ranges, []int{item.Min, item.Max})
}
@ -184,8 +185,8 @@ func (d *sequenceDefinition) builder() builder {
func (d *sequenceDefinition) isCharSequence(r *registry) bool {
for i := range d.originalItems {
item := normalizeItemRange(d.originalItems[i])
if item.Min != 1 || item.Max != 1 {
min, max := normalizeItemRange(d.originalItems[i].Min, d.originalItems[i].Max)
if min != 1 || max != 1 {
return false
}
@ -199,98 +200,140 @@ func (d *sequenceDefinition) isCharSequence(r *registry) bool {
return true
}
func (d *sequenceDefinition) format(r *registry, f formatFlags) string {
if d.isCharSequence(r) {
if len(d.originalItems) == 1 {
itemDef := r.definition[d.originalItems[0].Name]
c, _ := itemDef.(*charParser)
return c.format(r, f)
}
var chars []rune
for i := range d.originalItems {
itemDef := r.definition[d.originalItems[i].Name]
c, _ := itemDef.(*charParser)
chars = append(chars, c.chars[0])
}
chars = escape(charClassEscape, []rune(charClassBanned), chars)
return string(append([]rune{'"'}, append(chars, '"')...))
func (d *sequenceDefinition) formatCharSequence(r *registry, o formatOptions) string {
if len(d.originalItems) == 1 {
itemDef := r.definition[d.originalItems[0].Name]
c, _ := itemDef.(*charParser)
return c.format(r, o)
}
var chars []rune
for i := range d.originalItems {
itemDef := r.definition[d.originalItems[i].Name]
c, _ := itemDef.(*charParser)
chars = append(chars, c.chars[0])
}
chars = escape(charClassEscape, []rune(charSeqBanned), chars)
return string(append([]rune{'"'}, append(chars, '"')...))
}
func formatSequenceItem(r *registry, item definition, needsGrouping bool, o formatOptions) []rune {
oi := o
if needsGrouping {
oi.targetWidth -= 2
}
itemString := item.format(r, oi)
if !needsGrouping {
return []rune(itemString)
}
var chars []rune
multiLine := strings.Contains(itemString, "\n")
if !multiLine {
chars = append(chars, '(')
chars = append(chars, []rune(itemString)...)
chars = append(chars, ')')
return chars
}
lines := strings.Split(itemString, "\n")
chars = append(chars, '(', ' ')
chars = append(chars, []rune(lines[0])...)
for _, l := range lines[1:] {
chars = append(chars, '\n', ' ', ' ')
chars = append(chars, []rune(l)...)
}
chars = append(chars, '\n', ' ', ' ', ')')
return chars
}
func (d *sequenceDefinition) formatSequence(r *registry, o formatOptions) []rune {
var chars []rune
sep := []rune{' '}
if o.mode&formatPretty > 0 {
sep = []rune{'\n', ' ', ' '}
}
for i := range d.originalItems {
if len(chars) > 0 {
chars = append(chars, ' ')
chars = append(chars, sep...)
}
item := normalizeItemRange(d.originalItems[i])
needsQuantifier := item.Min != 1 || item.Max != 1
itemDef := r.definition[item.Name]
min, max := normalizeItemRange(d.originalItems[i].Min, d.originalItems[i].Max)
needsQuantifier := min != 1 || max != 1
itemDef := r.definition[d.originalItems[i].Name]
isSymbol := itemDef.commitType()&userDefined != 0
ch, isChoice := itemDef.(*choiceDefinition)
isChoiceOfMultiple := isChoice && len(ch.options) > 1
seq, isSequence := itemDef.(*sequenceDefinition)
isSequenceOfMultiple := isSequence && len(seq.originalItems) > 1 && !seq.isCharSequence(r)
needsGrouping := isChoiceOfMultiple || isSequenceOfMultiple
if isSymbol {
chars = append(chars, []rune(itemDef.nodeName())...)
} else {
if needsGrouping {
chars = append(chars, '(')
}
chars = append(chars, []rune(itemDef.format(r, f))...)
if needsGrouping {
chars = append(chars, ')')
}
chars = append(chars, formatSequenceItem(r, itemDef, needsGrouping, o)...)
}
if !needsQuantifier {
continue
}
if item.Min == 0 && item.Max == 1 {
if min == 0 && max == 1 {
chars = append(chars, '?')
continue
}
if item.Min == 0 && item.Max < 0 {
if min == 0 && max < 0 {
chars = append(chars, '*')
continue
}
if item.Min == 1 && item.Max < 0 {
if min == 1 && max < 0 {
chars = append(chars, '+')
continue
}
chars = append(chars, '{')
if item.Min == item.Max {
chars = append(chars, []rune(strconv.Itoa(item.Min))...)
if min == max {
chars = append(chars, []rune(strconv.Itoa(min))...)
} else {
if item.Min > 0 {
chars = append(chars, []rune(strconv.Itoa(item.Min))...)
if min > 0 {
chars = append(chars, []rune(strconv.Itoa(min))...)
}
chars = append(chars, ',')
if item.Max >= 0 {
chars = append(chars, []rune(strconv.Itoa(item.Max))...)
if max >= 0 {
chars = append(chars, []rune(strconv.Itoa(max))...)
}
}
chars = append(chars, '}')
}
return string(chars)
return chars
}
func (d *sequenceDefinition) format(r *registry, o formatOptions) string {
if d.isCharSequence(r) {
return d.formatCharSequence(r, o)
}
oo := o
oo.mode &^= formatPretty
f := d.formatSequence(r, oo)
if o.mode&formatPretty == 0 {
return string(f)
}
if len(f) <= o.targetWidth {
return string(f)
}
f = d.formatSequence(r, o)
return string(f)
}
func (p *sequenceParser) generate(w io.Writer, done map[string]bool) error {

View File

@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"io"
"strings"
)
// SequenceItem represents a single element within a sequence definition, referencing another parser by name.
@ -54,6 +55,7 @@ type SequenceItem struct {
//
// 3. Execution: Use Parse() to process input or Generate() to create Go source code.
type Syntax struct {
ast *Node
registry *registry
initialized bool
errInitFailed error
@ -91,7 +93,7 @@ type definition interface {
addGeneralization(int)
parser() parser
builder() builder
format(*registry, formatFlags) string
format(*registry, formatOptions) string
}
var (
@ -110,26 +112,44 @@ var (
)
func (ct CommitType) String() string {
switch ct {
case None:
if ct == None || ct == userDefined {
return "none"
case Alias:
return "alias"
case Whitespace:
return "whitespace"
case NoWhitespace:
return "no-whitespace"
case Keyword:
return "keyword"
case NoKeyword:
return "no-keyword"
case FailPass:
return "fail-pass"
case Root:
return "root"
default:
}
if ct&^(Alias|Whitespace|NoWhitespace|Keyword|NoKeyword|FailPass|Root|userDefined) != 0 {
return "unknown"
}
var cts []string
if ct&Alias > 0 {
cts = append(cts, "alias")
}
if ct&Whitespace > 0 {
cts = append(cts, "ws")
}
if ct&NoWhitespace > 0 {
cts = append(cts, "nows")
}
if ct&Keyword > 0 {
cts = append(cts, "kw")
}
if ct&NoKeyword > 0 {
cts = append(cts, "nokw")
}
if ct&FailPass > 0 {
cts = append(cts, "failpass")
}
if ct&Root > 0 {
cts = append(cts, "root")
}
return strings.Join(cts, ":")
}
func duplicateDefinition(name string) error {
@ -143,6 +163,10 @@ func parserNotFound(name string) error {
var symbolChars = []rune("\\ \n\t\b\f\r\v/.[]\"{}^+*?|():=;")
func isValidSymbol(n string) bool {
if len(n) == 0 {
return false
}
runes := []rune(n)
for _, r := range runes {
if !matchChar(symbolChars, nil, true, r) {
@ -357,6 +381,7 @@ func (s *Syntax) ReadSyntax(r io.Reader) error {
}
n := mapSelfNode(sn)
s.ast = n
return define(s, n)
}
@ -523,3 +548,12 @@ func (s *Syntax) Parse(r io.Reader) (*Node, error) {
return parseInput(r, s.root.parser(), s.root.builder(), s.keywordParsers())
}
// Format prints the loaded syntax definition to the output in a formatted way.
func (s *Syntax) Format(out io.Writer) error {
if s.ast == nil {
return formatDefinitions(out, s)
}
return formatAST(out, s.ast)
}

View File

@ -1,11 +1,19 @@
/*
foo
*/
// bar
// bar
//baz
/* foo
bar baz */// foo bar baz
wschar:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wsc:ws = wschar | comment;
block-comment:alias:nows = "/*" ("*" [^/] | [^*])* "*/";
line-comment:alias:nows = "//" [^\n]*;
comment-segment:alias:nows = line-comment | block-comment;
ws-no-nl:alias:nows = " " | "\t" | "\b" | "\f" | "\r" | "\v";
comment:nows = comment-segment (ws-no-nl* "\n"? ws-no-nl* comment-segment)*;
ws-no-nl:alias:nows = " " | "\t" | "\b" | /* this one */ /* is a */ "\f" /* form feed */ /* for sure */ | "\r" | "\v";
comment:nows = comment-segment /* segment is not the best name */ /* but */ (ws-no-nl* "\n"? ws-no-nl* /* fine */ comment-segment)*;
any-char = "."; // equivalent to [^]

View File

@ -53,6 +53,11 @@ const (
formatIncludeComments
)
type formatOptions struct {
mode formatFlags
targetWidth int
}
// ParseError reports a failure to match the input text against the defined syntax.
type ParseError struct {