1
0

fix comment handling in formatting

This commit is contained in:
Arpad Ryszka 2026-06-01 20:28:39 +02:00
parent d9f1c70d99
commit b8036802f6
3 changed files with 270 additions and 64 deletions

137
format.go
View File

@ -104,13 +104,24 @@ func groupASTByComments(ast *Node) []formatGroup {
return groups return groups
} }
func trimComment(text string) string { func trimComment(text string, withBlockBody bool) string {
var inBlockComment, inLineComment bool var inBlockComment, inLineComment, newBlockLine bool
tr := []rune(text) tr := []rune(text)
rr := make([]rune, 0, len(tr)) rr := make([]rune, 0, len(tr))
for i := 0; i < len(tr); i++ { for i := 0; i < len(tr); i++ {
r := tr[i] r := tr[i]
if inBlockComment { if inBlockComment {
if withBlockBody && r == '\n' {
rr = append(rr, '\n')
newBlockLine = true
continue
}
if withBlockBody && newBlockLine && unicode.IsSpace(r) {
continue
}
newBlockLine = false
if r != '*' || len(tr) <= i+1 || tr[i+1] != '/' { if r != '*' || len(tr) <= i+1 || tr[i+1] != '/' {
rr = append(rr, r) rr = append(rr, r)
continue continue
@ -163,9 +174,9 @@ func trimComment(text string) string {
return strings.Join(lines, "\n") return strings.Join(lines, "\n")
} }
func formatComment(out io.Writer, n *Node) error { func formatComment(out io.Writer, n *Node, withBlockBody bool) error {
text := n.Text() text := n.Text()
text = trimComment(text) text = trimComment(text, withBlockBody)
_, err := fmt.Fprint(out, text) _, err := fmt.Fprint(out, text)
return err return err
} }
@ -174,6 +185,10 @@ func formatDefinitionName(item formatItem) string {
name := item.node.Nodes[0].Text() name := item.node.Nodes[0].Text()
flags := make([]string, 0, len(item.node.Nodes)-2) flags := make([]string, 0, len(item.node.Nodes)-2)
for i := 1; i < len(item.node.Nodes)-1; i++ { for i := 1; i < len(item.node.Nodes)-1; i++ {
if item.node.Nodes[i].Name == "comment" {
continue
}
flags = append(flags, item.node.Nodes[i].Name) flags = append(flags, item.node.Nodes[i].Name)
} }
@ -343,7 +358,7 @@ func formatSequenceItemNodes(out io.Writer, targetWidth int, n []*Node) error {
} }
if ni.Name == "comment" { if ni.Name == "comment" {
if err := formatComment(out, ni); err != nil { if err := formatComment(out, ni, true); err != nil {
return err return err
} }
@ -359,6 +374,18 @@ func formatSequenceItemNodes(out io.Writer, targetWidth int, n []*Node) error {
} }
func formatSequence(out io.Writer, targetWidth int, n []*Node) error { func formatSequence(out io.Writer, targetWidth int, n []*Node) error {
var hasComments bool
for _, ni := range n {
if ni.Name == "comment" {
hasComments = true
break
}
}
if hasComments {
return formatSequenceItemNodes(out, 0, n)
}
var buf bytes.Buffer var buf bytes.Buffer
if err := formatSequenceItemNodes(&buf, -1, n); err != nil { if err := formatSequenceItemNodes(&buf, -1, n); err != nil {
return err return err
@ -389,7 +416,7 @@ func formatChoiceOptionNodes(out io.Writer, targetWidth int, n []*Node) error {
} }
} }
if err := formatComment(out, ni); err != nil { if err := formatComment(out, ni, true); err != nil {
return err return err
} }
@ -411,6 +438,18 @@ func formatChoiceOptionNodes(out io.Writer, targetWidth int, n []*Node) error {
} }
func formatChoice(out io.Writer, targetWidth int, n []*Node) error { func formatChoice(out io.Writer, targetWidth int, n []*Node) error {
var hasComments bool
for _, ni := range n {
if ni.Name == "comment" {
hasComments = true
break
}
}
if hasComments {
return formatChoiceOptionNodes(out, 0, n)
}
var buf bytes.Buffer var buf bytes.Buffer
if err := formatChoiceOptionNodes(&buf, -1, n); err != nil { if err := formatChoiceOptionNodes(&buf, -1, n); err != nil {
return err return err
@ -431,7 +470,7 @@ func formatExpression(out io.Writer, targetWidth int, n *Node) error {
var err error var err error
switch n.Name { switch n.Name {
case "comment": case "comment":
err = formatComment(out, n) err = formatComment(out, n, true)
case "any-char": case "any-char":
err = formatAnyChar(out) err = formatAnyChar(out)
case "char-class": case "char-class":
@ -450,17 +489,12 @@ func formatExpression(out io.Writer, targetWidth int, n *Node) error {
} }
func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name string, n *Node) error { func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name string, n *Node) error {
if _, err := fmt.Fprintf(out, "%s%s = ", name, pad[:namesWidth-len(name)]); err != nil { var (
return err buf bytes.Buffer
} foundComment bool
err error
)
var buf bytes.Buffer
targetWidth = decTargetWidth(targetWidth, namesWidth+3)
if err := formatExpression(&buf, targetWidth, n.Nodes[len(n.Nodes)-1]); err != nil {
return err
}
var err error
fprint := func(a ...any) { fprint := func(a ...any) {
if err != nil { if err != nil {
return return
@ -469,6 +503,43 @@ func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name stri
_, err = fmt.Fprint(out, a...) _, err = fmt.Fprint(out, a...)
} }
fprintBuf := func(a ...any) {
if err != nil {
return
}
_, err = fmt.Fprint(&buf, a...)
}
fprint(name, pad[:namesWidth-len(name)], " = ")
for _, n := range n.Nodes {
if n.Name != "comment" {
continue
}
if foundComment {
fprintBuf("\n")
}
foundComment = true
if err := formatComment(&buf, n, true); err != nil {
return err
}
}
if foundComment {
lines := strings.Split(buf.String(), "\n")
(&buf).Reset()
for _, l := range lines {
fprint(l, "\n", pad, " ")
}
}
targetWidth = decTargetWidth(targetWidth, namesWidth+3)
if err := formatExpression(&buf, targetWidth, n.Nodes[len(n.Nodes)-1]); err != nil {
return err
}
lines := strings.Split(buf.String(), "\n") lines := strings.Split(buf.String(), "\n")
fprint(lines[0]) fprint(lines[0])
for _, l := range lines[1:] { for _, l := range lines[1:] {
@ -483,12 +554,12 @@ func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name stri
func formatASTGroup(out io.Writer, g formatGroup) error { func formatASTGroup(out io.Writer, g formatGroup) error {
if g.items[0].commentFormat == standaloneComment { if g.items[0].commentFormat == standaloneComment {
return formatComment(out, g.items[0].node) return formatComment(out, g.items[0].node, false)
} }
hasHeaderComment := g.items[0].commentFormat == headerComment hasHeaderComment := g.items[0].commentFormat == headerComment
if hasHeaderComment { if hasHeaderComment {
if err := formatComment(out, g.items[0].node); err != nil { if err := formatComment(out, g.items[0].node, false); err != nil {
return err return err
} }
@ -504,7 +575,22 @@ func formatASTGroup(out io.Writer, g formatGroup) error {
return err return err
} }
formatComment(out, item.node) var buf bytes.Buffer
if err := formatComment(&buf, item.node, true); err != nil {
return err
}
lines := strings.Split(buf.String(), "\n")
if _, err := fmt.Fprint(out, lines[0]); err != nil {
return err
}
for _, l := range lines[1:] {
if _, err := fmt.Fprintf(out, "\n %s%s", pad, l); err != nil {
return err
}
}
continue continue
} }
@ -553,11 +639,12 @@ func formatAST(out io.Writer, ast *Node) error {
// //
// inline comment: // inline comment:
// - it's inside a definition // - it's inside a definition
// - if it's before the eq sign, discard name padding and use block comment // - if it's outside of the expression, only one, we don't know if it's before or after the eq sign,
// - if it's in an expression, and falls on its own line, and fits on the previous line, put it there // treat it as after
// - if it's in an expression, and falls on its own line, use a line comment // - if it's outside of the expression, put it below the name and the eq sign unindented, and put the
// - if it's in an expression, and it's followed by non-comment on the same line, use block comment // expression below the comment or two comments, indented
// - if it consists of multiple lines, append a new line below the definition // - it it's in the expression, always wrap the expression into lines, and put the comment on its own
// line
groups := groupASTByComments(ast) groups := groupASTByComments(ast)
for i, g := range groups { for i, g := range groups {

View File

@ -226,34 +226,138 @@ func TestChoiceFormat(t *testing.T) {
const testDoc = `/* const testDoc = `/*
foo foo
*/ */
// bar
// bar
//baz
/* foo
bar baz */// foo bar baz
wschar:alias =// foo
/* bar */ " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wsc:ws = wschar | comment;
block-comment:alias:nows /* foo */ // bar
= "/*" ("*" [^/] | [^*])* "*/";
line-comment:alias:nows /*
foo
*/ = "//" [^\n]*;
comment-segment:alias:nows = // bar
line-comment | block-comment;
ws-no-nl:alias:nows = " " | "\t" | "\b" | /* this one */ /* is a */ "\f" /* form feed */ // for sure
| "\r" | "\v";
comment:nows = comment-segment /* segment is not the best name */ /* but */ (ws-no-nl* "\n"? ws-no-nl* // fine
comment-segment)*;
any-char = "."; // equivalent to [^]
// caution: newline is accepted
/* class not */ class-not = "^";
class-char:nows = [^\\\[\]\^\-] | "\\" . /* foo
bar */;
char-range:nows = class-char "-" class-char // foo
;
char-class:nows = "[" class-not? (class-char | char-range)* "]"; // foo
/* bar
baz */
// newline is accepted
sequence-char:nows = [^\\"] | "\\" .;
char-sequence:nows = "\"" sequence-char* "\"";
terminal:alias = any-char | char-class | char-sequence;
symbol:nows = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+;
group:alias = "(" expression ")";
number:alias:nows = [0-9]+;
count = number;
count-quantifier = "{" count "}";
range-from = number;
range-to = number;
range-quantifier = "{" range-from? "," range-to? "}";
one-or-more = "+";
zero-or-more = "*";
zero-or-one = "?";
quantity:alias = count-quantifier
| range-quantifier
| one-or-more
| zero-or-more
| zero-or-one;
item:nows = (terminal | symbol | group) quantity?;
sequence = item+;
option:alias = terminal | symbol | group | sequence;
// DOC: how the order matters
choice = option ("|" option)+;
// DOC: not having 'not' needs some tricks sometimes
expression:alias = terminal | symbol | group | sequence | choice;
alias = "alias";
ws = "ws";
nows = "nows";
kw = "kw";
nokw = "nokw";
failpass = "failpass";
root = "root";
flag:alias = alias | ws | nows | kw | nokw | failpass | root;
definition-name:alias:nows = symbol (":" flag)*;
definition = definition-name "=" expression;
definitions:alias = definition (";"+ definition)*;
syntax:root = ";"* definitions? ";"*;
`
const testDocCheck = `/*
foo
*/
// bar // bar
// bar // bar
// baz // baz
/* foo /* foo
bar baz */ // foo bar baz bar baz */ // foo bar baz
wschar:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; wschar:alias = // foo
/* bar */
" " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wsc:ws = wschar | comment; wsc:ws = wschar | comment;
block-comment:alias:nows = "/*" ("*" [^/] | [^*])* "*/"; block-comment:alias:nows = /* foo */ // bar
line-comment:alias:nows = "//" [^\n]*; "/*" ("*" [^/] | [^*])* "*/";
comment-segment:alias:nows = line-comment | block-comment; line-comment:alias:nows = /*
foo
*/
"//" [^\n]*;
comment-segment:alias:nows = // bar
line-comment | block-comment;
ws-no-nl:alias:nows = " " ws-no-nl:alias:nows = " "
| "\t" | "\t"
| "\b" | "\b"
/* this one */ /* is a */ /* this one */ /* is a */
| "\f" | "\f"
/* form feed */ /* for sure */ /* form feed */ // for sure
| "\r" | "\r"
| "\v"; | "\v";
comment:nows = comment-segment comment:nows = comment-segment
/* segment is not the best name */ /* but */ /* segment is not the best name */ /* but */
(ws-no-nl* "\n"? ws-no-nl* /* fine */ comment-segment)*; ( ws-no-nl*
"\n"?
ws-no-nl*
// fine
comment-segment
)*;
any-char = "."; // equivalent to [^] any-char = "."; // equivalent to [^]
// caution: newline is accepted // caution: newline is accepted
/* class not */
class-not = "^"; class-not = "^";
class-char:nows = [^\\\[\]\^\-] | "\\" .; class-char:nows = [^\\\[\]\^\-] | "\\" .; /* foo
char-range:nows = class-char "-" class-char; bar */
char-class:nows = "[" class-not? (class-char | char-range)* "]"; char-range:nows = class-char "-" class-char; // foo
char-class:nows = "[" class-not? (class-char | char-range)* "]"; // foo
/* bar
baz */
// newline is accepted // newline is accepted
sequence-char:nows = [^\\"] | "\\" .; sequence-char:nows = [^\\"] | "\\" .;
@ -295,20 +399,32 @@ definitions:alias = definition (";"+ definition)*;
syntax:root = ";"* definitions? ";"*;` syntax:root = ";"* definitions? ";"*;`
func TestDocFormat(t *testing.T) { func TestDocFormat(t *testing.T) {
in := bytes.NewBufferString(testDoc) for _, test := range []struct{ title, in, out string }{{
s := &Syntax{} title: "format",
if err := s.ReadSyntax(in); err != nil { in: testDoc,
t.Fatal(err) out: testDocCheck,
} }, {
title: "check",
in: testDocCheck,
out: testDocCheck,
}} {
t.Run(test.title, func(t *testing.T) {
in := bytes.NewBufferString(test.in)
s := &Syntax{}
if err := s.ReadSyntax(in); err != nil {
t.Fatal(err)
}
out := bytes.NewBuffer(nil) out := bytes.NewBuffer(nil)
if err := s.Format(out); err != nil { if err := s.Format(out); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if out.String() != testDoc { if out.String() != test.out {
t.Log(testDoc) t.Log(test.out)
t.Log(out.String()) t.Log(out.String())
t.Fatal() t.Fatal()
}
})
} }
} }

View File

@ -6,21 +6,28 @@
//baz //baz
/* foo /* foo
bar baz */// foo bar baz bar baz */// foo bar baz
wschar:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; wschar:alias =// foo
/* bar */ " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wsc:ws = wschar | comment; wsc:ws = wschar | comment;
block-comment:alias:nows = "/*" ("*" [^/] | [^*])* "*/"; block-comment:alias:nows /* foo */ // bar
line-comment:alias:nows = "//" [^\n]*; = "/*" ("*" [^/] | [^*])* "*/";
comment-segment:alias:nows = line-comment | block-comment; line-comment:alias:nows /*
foo
*/ = "//" [^\n]*;
comment-segment:alias:nows = // bar
line-comment | block-comment;
ws-no-nl:alias:nows = " " | "\t" | "\b" | /* this one */ /* is a */ "\f" /* form feed */ /* for sure */ | "\r" | "\v"; ws-no-nl:alias:nows = " " | "\t" | "\b" | /* this one */ /* is a */ "\f" /* form feed */ /* for sure */ | "\r" | "\v";
comment:nows = comment-segment /* segment is not the best name */ /* but */ (ws-no-nl* "\n"? ws-no-nl* /* fine */ comment-segment)*; comment:nows = comment-segment /* segment is not the best name */ /* but */ (ws-no-nl* "\n"? ws-no-nl* /* fine */ comment-segment)*;
any-char = "."; // equivalent to [^] any-char = "."; // equivalent to [^]
// caution: newline is accepted // caution: newline is accepted
class-not = "^"; /* class not */ class-not = "^";
class-char:nows = [^\\\[\]\^\-] | "\\" .; class-char:nows = [^\\\[\]\^\-] | "\\" . /* foo
char-range:nows = class-char "-" class-char; bar */;
char-range:nows = class-char "-" class-char // foo
;
char-class:nows = "[" class-not? (class-char | char-range)* "]"; char-class:nows = "[" class-not? (class-char | char-range)* "]";
// newline is accepted // newline is accepted
@ -58,22 +65,18 @@ choice = option ("|" option)+;
// DOC: not having 'not' needs some tricks sometimes // DOC: not having 'not' needs some tricks sometimes
expression:alias = terminal expression:alias = terminal | symbol | group | sequence | choice;
| symbol
| group
| sequence
| choice;
alias = "alias"; alias = "alias";
ws = "ws"; ws = "ws";
nows = "nows"; nows = "nows";
kw = "kw"; kw = "kw";
nokw = "nokw"; nokw = "nokw";
failpass = "failpass"; failpass = "failpass";
root = "root"; root = "root";
flag:alias = alias | ws | nows | kw | nokw | failpass | root; flag:alias = alias | ws | nows | kw | nokw | failpass | root;
definition-name:alias:nows = symbol (":" flag)*; definition-name:alias:nows = symbol (":" flag)*;
definition = definition-name "=" expression; definition = definition-name "=" expression;
definitions:alias = definition (";"+ definition)*; definitions:alias = definition (";"+ definition)*;
syntax:root = ";"* definitions? ";"*; syntax:root = ";"* definitions? ";"*;