1
0

fix comment handling in formatting

This commit is contained in:
Arpad Ryszka 2026-06-01 20:28:39 +02:00
parent d9f1c70d99
commit b8036802f6
3 changed files with 270 additions and 64 deletions

137
format.go
View File

@ -104,13 +104,24 @@ func groupASTByComments(ast *Node) []formatGroup {
return groups
}
func trimComment(text string) string {
var inBlockComment, inLineComment bool
func trimComment(text string, withBlockBody bool) string {
var inBlockComment, inLineComment, newBlockLine bool
tr := []rune(text)
rr := make([]rune, 0, len(tr))
for i := 0; i < len(tr); i++ {
r := tr[i]
if inBlockComment {
if withBlockBody && r == '\n' {
rr = append(rr, '\n')
newBlockLine = true
continue
}
if withBlockBody && newBlockLine && unicode.IsSpace(r) {
continue
}
newBlockLine = false
if r != '*' || len(tr) <= i+1 || tr[i+1] != '/' {
rr = append(rr, r)
continue
@ -163,9 +174,9 @@ func trimComment(text string) string {
return strings.Join(lines, "\n")
}
func formatComment(out io.Writer, n *Node) error {
func formatComment(out io.Writer, n *Node, withBlockBody bool) error {
text := n.Text()
text = trimComment(text)
text = trimComment(text, withBlockBody)
_, err := fmt.Fprint(out, text)
return err
}
@ -174,6 +185,10 @@ func formatDefinitionName(item formatItem) string {
name := item.node.Nodes[0].Text()
flags := make([]string, 0, len(item.node.Nodes)-2)
for i := 1; i < len(item.node.Nodes)-1; i++ {
if item.node.Nodes[i].Name == "comment" {
continue
}
flags = append(flags, item.node.Nodes[i].Name)
}
@ -343,7 +358,7 @@ func formatSequenceItemNodes(out io.Writer, targetWidth int, n []*Node) error {
}
if ni.Name == "comment" {
if err := formatComment(out, ni); err != nil {
if err := formatComment(out, ni, true); err != nil {
return err
}
@ -359,6 +374,18 @@ func formatSequenceItemNodes(out io.Writer, targetWidth int, n []*Node) error {
}
func formatSequence(out io.Writer, targetWidth int, n []*Node) error {
var hasComments bool
for _, ni := range n {
if ni.Name == "comment" {
hasComments = true
break
}
}
if hasComments {
return formatSequenceItemNodes(out, 0, n)
}
var buf bytes.Buffer
if err := formatSequenceItemNodes(&buf, -1, n); err != nil {
return err
@ -389,7 +416,7 @@ func formatChoiceOptionNodes(out io.Writer, targetWidth int, n []*Node) error {
}
}
if err := formatComment(out, ni); err != nil {
if err := formatComment(out, ni, true); err != nil {
return err
}
@ -411,6 +438,18 @@ func formatChoiceOptionNodes(out io.Writer, targetWidth int, n []*Node) error {
}
func formatChoice(out io.Writer, targetWidth int, n []*Node) error {
var hasComments bool
for _, ni := range n {
if ni.Name == "comment" {
hasComments = true
break
}
}
if hasComments {
return formatChoiceOptionNodes(out, 0, n)
}
var buf bytes.Buffer
if err := formatChoiceOptionNodes(&buf, -1, n); err != nil {
return err
@ -431,7 +470,7 @@ func formatExpression(out io.Writer, targetWidth int, n *Node) error {
var err error
switch n.Name {
case "comment":
err = formatComment(out, n)
err = formatComment(out, n, true)
case "any-char":
err = formatAnyChar(out)
case "char-class":
@ -450,17 +489,12 @@ func formatExpression(out io.Writer, targetWidth int, n *Node) error {
}
func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name string, n *Node) error {
if _, err := fmt.Fprintf(out, "%s%s = ", name, pad[:namesWidth-len(name)]); err != nil {
return err
}
var (
buf bytes.Buffer
foundComment bool
err error
)
var buf bytes.Buffer
targetWidth = decTargetWidth(targetWidth, namesWidth+3)
if err := formatExpression(&buf, targetWidth, n.Nodes[len(n.Nodes)-1]); err != nil {
return err
}
var err error
fprint := func(a ...any) {
if err != nil {
return
@ -469,6 +503,43 @@ func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name stri
_, err = fmt.Fprint(out, a...)
}
fprintBuf := func(a ...any) {
if err != nil {
return
}
_, err = fmt.Fprint(&buf, a...)
}
fprint(name, pad[:namesWidth-len(name)], " = ")
for _, n := range n.Nodes {
if n.Name != "comment" {
continue
}
if foundComment {
fprintBuf("\n")
}
foundComment = true
if err := formatComment(&buf, n, true); err != nil {
return err
}
}
if foundComment {
lines := strings.Split(buf.String(), "\n")
(&buf).Reset()
for _, l := range lines {
fprint(l, "\n", pad, " ")
}
}
targetWidth = decTargetWidth(targetWidth, namesWidth+3)
if err := formatExpression(&buf, targetWidth, n.Nodes[len(n.Nodes)-1]); err != nil {
return err
}
lines := strings.Split(buf.String(), "\n")
fprint(lines[0])
for _, l := range lines[1:] {
@ -483,12 +554,12 @@ func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name stri
func formatASTGroup(out io.Writer, g formatGroup) error {
if g.items[0].commentFormat == standaloneComment {
return formatComment(out, g.items[0].node)
return formatComment(out, g.items[0].node, false)
}
hasHeaderComment := g.items[0].commentFormat == headerComment
if hasHeaderComment {
if err := formatComment(out, g.items[0].node); err != nil {
if err := formatComment(out, g.items[0].node, false); err != nil {
return err
}
@ -504,7 +575,22 @@ func formatASTGroup(out io.Writer, g formatGroup) error {
return err
}
formatComment(out, item.node)
var buf bytes.Buffer
if err := formatComment(&buf, item.node, true); err != nil {
return err
}
lines := strings.Split(buf.String(), "\n")
if _, err := fmt.Fprint(out, lines[0]); err != nil {
return err
}
for _, l := range lines[1:] {
if _, err := fmt.Fprintf(out, "\n %s%s", pad, l); err != nil {
return err
}
}
continue
}
@ -553,11 +639,12 @@ func formatAST(out io.Writer, ast *Node) error {
//
// inline comment:
// - it's inside a definition
// - if it's before the eq sign, discard name padding and use block comment
// - if it's in an expression, and falls on its own line, and fits on the previous line, put it there
// - if it's in an expression, and falls on its own line, use a line comment
// - if it's in an expression, and it's followed by non-comment on the same line, use block comment
// - if it consists of multiple lines, append a new line below the definition
// - if it's outside of the expression, only one, we don't know if it's before or after the eq sign,
// treat it as after
// - if it's outside of the expression, put it below the name and the eq sign unindented, and put the
// expression below the comment or two comments, indented
// - it it's in the expression, always wrap the expression into lines, and put the comment on its own
// line
groups := groupASTByComments(ast)
for i, g := range groups {

View File

@ -226,34 +226,138 @@ func TestChoiceFormat(t *testing.T) {
const testDoc = `/*
foo
*/
// bar
// bar
//baz
/* foo
bar baz */// foo bar baz
wschar:alias =// foo
/* bar */ " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wsc:ws = wschar | comment;
block-comment:alias:nows /* foo */ // bar
= "/*" ("*" [^/] | [^*])* "*/";
line-comment:alias:nows /*
foo
*/ = "//" [^\n]*;
comment-segment:alias:nows = // bar
line-comment | block-comment;
ws-no-nl:alias:nows = " " | "\t" | "\b" | /* this one */ /* is a */ "\f" /* form feed */ // for sure
| "\r" | "\v";
comment:nows = comment-segment /* segment is not the best name */ /* but */ (ws-no-nl* "\n"? ws-no-nl* // fine
comment-segment)*;
any-char = "."; // equivalent to [^]
// caution: newline is accepted
/* class not */ class-not = "^";
class-char:nows = [^\\\[\]\^\-] | "\\" . /* foo
bar */;
char-range:nows = class-char "-" class-char // foo
;
char-class:nows = "[" class-not? (class-char | char-range)* "]"; // foo
/* bar
baz */
// newline is accepted
sequence-char:nows = [^\\"] | "\\" .;
char-sequence:nows = "\"" sequence-char* "\"";
terminal:alias = any-char | char-class | char-sequence;
symbol:nows = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+;
group:alias = "(" expression ")";
number:alias:nows = [0-9]+;
count = number;
count-quantifier = "{" count "}";
range-from = number;
range-to = number;
range-quantifier = "{" range-from? "," range-to? "}";
one-or-more = "+";
zero-or-more = "*";
zero-or-one = "?";
quantity:alias = count-quantifier
| range-quantifier
| one-or-more
| zero-or-more
| zero-or-one;
item:nows = (terminal | symbol | group) quantity?;
sequence = item+;
option:alias = terminal | symbol | group | sequence;
// DOC: how the order matters
choice = option ("|" option)+;
// DOC: not having 'not' needs some tricks sometimes
expression:alias = terminal | symbol | group | sequence | choice;
alias = "alias";
ws = "ws";
nows = "nows";
kw = "kw";
nokw = "nokw";
failpass = "failpass";
root = "root";
flag:alias = alias | ws | nows | kw | nokw | failpass | root;
definition-name:alias:nows = symbol (":" flag)*;
definition = definition-name "=" expression;
definitions:alias = definition (";"+ definition)*;
syntax:root = ";"* definitions? ";"*;
`
const testDocCheck = `/*
foo
*/
// bar
// bar
// baz
/* foo
bar baz */ // foo bar baz
wschar:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wschar:alias = // foo
/* bar */
" " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wsc:ws = wschar | comment;
block-comment:alias:nows = "/*" ("*" [^/] | [^*])* "*/";
line-comment:alias:nows = "//" [^\n]*;
comment-segment:alias:nows = line-comment | block-comment;
block-comment:alias:nows = /* foo */ // bar
"/*" ("*" [^/] | [^*])* "*/";
line-comment:alias:nows = /*
foo
*/
"//" [^\n]*;
comment-segment:alias:nows = // bar
line-comment | block-comment;
ws-no-nl:alias:nows = " "
| "\t"
| "\b"
/* this one */ /* is a */
| "\f"
/* form feed */ /* for sure */
/* form feed */ // for sure
| "\r"
| "\v";
comment:nows = comment-segment
/* segment is not the best name */ /* but */
(ws-no-nl* "\n"? ws-no-nl* /* fine */ comment-segment)*;
( ws-no-nl*
"\n"?
ws-no-nl*
// fine
comment-segment
)*;
any-char = "."; // equivalent to [^]
// caution: newline is accepted
/* class not */
class-not = "^";
class-char:nows = [^\\\[\]\^\-] | "\\" .;
char-range:nows = class-char "-" class-char;
char-class:nows = "[" class-not? (class-char | char-range)* "]";
class-char:nows = [^\\\[\]\^\-] | "\\" .; /* foo
bar */
char-range:nows = class-char "-" class-char; // foo
char-class:nows = "[" class-not? (class-char | char-range)* "]"; // foo
/* bar
baz */
// newline is accepted
sequence-char:nows = [^\\"] | "\\" .;
@ -295,7 +399,17 @@ definitions:alias = definition (";"+ definition)*;
syntax:root = ";"* definitions? ";"*;`
func TestDocFormat(t *testing.T) {
in := bytes.NewBufferString(testDoc)
for _, test := range []struct{ title, in, out string }{{
title: "format",
in: testDoc,
out: testDocCheck,
}, {
title: "check",
in: testDocCheck,
out: testDocCheck,
}} {
t.Run(test.title, func(t *testing.T) {
in := bytes.NewBufferString(test.in)
s := &Syntax{}
if err := s.ReadSyntax(in); err != nil {
t.Fatal(err)
@ -306,9 +420,11 @@ func TestDocFormat(t *testing.T) {
t.Fatal(err)
}
if out.String() != testDoc {
t.Log(testDoc)
if out.String() != test.out {
t.Log(test.out)
t.Log(out.String())
t.Fatal()
}
})
}
}

View File

@ -6,21 +6,28 @@
//baz
/* foo
bar baz */// foo bar baz
wschar:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wschar:alias =// foo
/* bar */ " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v";
wsc:ws = wschar | comment;
block-comment:alias:nows = "/*" ("*" [^/] | [^*])* "*/";
line-comment:alias:nows = "//" [^\n]*;
comment-segment:alias:nows = line-comment | block-comment;
block-comment:alias:nows /* foo */ // bar
= "/*" ("*" [^/] | [^*])* "*/";
line-comment:alias:nows /*
foo
*/ = "//" [^\n]*;
comment-segment:alias:nows = // bar
line-comment | block-comment;
ws-no-nl:alias:nows = " " | "\t" | "\b" | /* this one */ /* is a */ "\f" /* form feed */ /* for sure */ | "\r" | "\v";
comment:nows = comment-segment /* segment is not the best name */ /* but */ (ws-no-nl* "\n"? ws-no-nl* /* fine */ comment-segment)*;
any-char = "."; // equivalent to [^]
// caution: newline is accepted
class-not = "^";
class-char:nows = [^\\\[\]\^\-] | "\\" .;
char-range:nows = class-char "-" class-char;
/* class not */ class-not = "^";
class-char:nows = [^\\\[\]\^\-] | "\\" . /* foo
bar */;
char-range:nows = class-char "-" class-char // foo
;
char-class:nows = "[" class-not? (class-char | char-range)* "]";
// newline is accepted
@ -58,11 +65,7 @@ choice = option ("|" option)+;
// DOC: not having 'not' needs some tricks sometimes
expression:alias = terminal
| symbol
| group
| sequence
| choice;
expression:alias = terminal | symbol | group | sequence | choice;
alias = "alias";
ws = "ws";