From b8036802f6f1e39c357aac9d2f7f134d30228e7a Mon Sep 17 00:00:00 2001 From: Arpad Ryszka Date: Mon, 1 Jun 2026 20:28:39 +0200 Subject: [PATCH] fix comment handling in formatting --- format.go | 137 +++++++++++++++++++++++++++++++++-------- format_test.go | 160 +++++++++++++++++++++++++++++++++++++++++------- syntax.treerack | 37 ++++++----- 3 files changed, 270 insertions(+), 64 deletions(-) diff --git a/format.go b/format.go index 74e4e7c..b943696 100644 --- a/format.go +++ b/format.go @@ -104,13 +104,24 @@ func groupASTByComments(ast *Node) []formatGroup { return groups } -func trimComment(text string) string { - var inBlockComment, inLineComment bool +func trimComment(text string, withBlockBody bool) string { + var inBlockComment, inLineComment, newBlockLine bool tr := []rune(text) rr := make([]rune, 0, len(tr)) for i := 0; i < len(tr); i++ { r := tr[i] if inBlockComment { + if withBlockBody && r == '\n' { + rr = append(rr, '\n') + newBlockLine = true + continue + } + + if withBlockBody && newBlockLine && unicode.IsSpace(r) { + continue + } + + newBlockLine = false if r != '*' || len(tr) <= i+1 || tr[i+1] != '/' { rr = append(rr, r) continue @@ -163,9 +174,9 @@ func trimComment(text string) string { return strings.Join(lines, "\n") } -func formatComment(out io.Writer, n *Node) error { +func formatComment(out io.Writer, n *Node, withBlockBody bool) error { text := n.Text() - text = trimComment(text) + text = trimComment(text, withBlockBody) _, err := fmt.Fprint(out, text) return err } @@ -174,6 +185,10 @@ func formatDefinitionName(item formatItem) string { name := item.node.Nodes[0].Text() flags := make([]string, 0, len(item.node.Nodes)-2) for i := 1; i < len(item.node.Nodes)-1; i++ { + if item.node.Nodes[i].Name == "comment" { + continue + } + flags = append(flags, item.node.Nodes[i].Name) } @@ -343,7 +358,7 @@ func formatSequenceItemNodes(out io.Writer, targetWidth int, n []*Node) error { } if ni.Name == "comment" { - if err := formatComment(out, ni); err != nil { + if err := formatComment(out, ni, true); err != nil { return err } @@ -359,6 +374,18 @@ func formatSequenceItemNodes(out io.Writer, targetWidth int, n []*Node) error { } func formatSequence(out io.Writer, targetWidth int, n []*Node) error { + var hasComments bool + for _, ni := range n { + if ni.Name == "comment" { + hasComments = true + break + } + } + + if hasComments { + return formatSequenceItemNodes(out, 0, n) + } + var buf bytes.Buffer if err := formatSequenceItemNodes(&buf, -1, n); err != nil { return err @@ -389,7 +416,7 @@ func formatChoiceOptionNodes(out io.Writer, targetWidth int, n []*Node) error { } } - if err := formatComment(out, ni); err != nil { + if err := formatComment(out, ni, true); err != nil { return err } @@ -411,6 +438,18 @@ func formatChoiceOptionNodes(out io.Writer, targetWidth int, n []*Node) error { } func formatChoice(out io.Writer, targetWidth int, n []*Node) error { + var hasComments bool + for _, ni := range n { + if ni.Name == "comment" { + hasComments = true + break + } + } + + if hasComments { + return formatChoiceOptionNodes(out, 0, n) + } + var buf bytes.Buffer if err := formatChoiceOptionNodes(&buf, -1, n); err != nil { return err @@ -431,7 +470,7 @@ func formatExpression(out io.Writer, targetWidth int, n *Node) error { var err error switch n.Name { case "comment": - err = formatComment(out, n) + err = formatComment(out, n, true) case "any-char": err = formatAnyChar(out) case "char-class": @@ -450,17 +489,12 @@ func formatExpression(out io.Writer, targetWidth int, n *Node) error { } func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name string, n *Node) error { - if _, err := fmt.Fprintf(out, "%s%s = ", name, pad[:namesWidth-len(name)]); err != nil { - return err - } + var ( + buf bytes.Buffer + foundComment bool + err error + ) - var buf bytes.Buffer - targetWidth = decTargetWidth(targetWidth, namesWidth+3) - if err := formatExpression(&buf, targetWidth, n.Nodes[len(n.Nodes)-1]); err != nil { - return err - } - - var err error fprint := func(a ...any) { if err != nil { return @@ -469,6 +503,43 @@ func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name stri _, err = fmt.Fprint(out, a...) } + fprintBuf := func(a ...any) { + if err != nil { + return + } + + _, err = fmt.Fprint(&buf, a...) + } + + fprint(name, pad[:namesWidth-len(name)], " = ") + for _, n := range n.Nodes { + if n.Name != "comment" { + continue + } + + if foundComment { + fprintBuf("\n") + } + + foundComment = true + if err := formatComment(&buf, n, true); err != nil { + return err + } + } + + if foundComment { + lines := strings.Split(buf.String(), "\n") + (&buf).Reset() + for _, l := range lines { + fprint(l, "\n", pad, " ") + } + } + + targetWidth = decTargetWidth(targetWidth, namesWidth+3) + if err := formatExpression(&buf, targetWidth, n.Nodes[len(n.Nodes)-1]); err != nil { + return err + } + lines := strings.Split(buf.String(), "\n") fprint(lines[0]) for _, l := range lines[1:] { @@ -483,12 +554,12 @@ func formatDefinition(out io.Writer, targetWidth, namesWidth int, pad, name stri func formatASTGroup(out io.Writer, g formatGroup) error { if g.items[0].commentFormat == standaloneComment { - return formatComment(out, g.items[0].node) + return formatComment(out, g.items[0].node, false) } hasHeaderComment := g.items[0].commentFormat == headerComment if hasHeaderComment { - if err := formatComment(out, g.items[0].node); err != nil { + if err := formatComment(out, g.items[0].node, false); err != nil { return err } @@ -504,7 +575,22 @@ func formatASTGroup(out io.Writer, g formatGroup) error { return err } - formatComment(out, item.node) + var buf bytes.Buffer + if err := formatComment(&buf, item.node, true); err != nil { + return err + } + + lines := strings.Split(buf.String(), "\n") + if _, err := fmt.Fprint(out, lines[0]); err != nil { + return err + } + + for _, l := range lines[1:] { + if _, err := fmt.Fprintf(out, "\n %s%s", pad, l); err != nil { + return err + } + } + continue } @@ -553,11 +639,12 @@ func formatAST(out io.Writer, ast *Node) error { // // inline comment: // - it's inside a definition - // - if it's before the eq sign, discard name padding and use block comment - // - if it's in an expression, and falls on its own line, and fits on the previous line, put it there - // - if it's in an expression, and falls on its own line, use a line comment - // - if it's in an expression, and it's followed by non-comment on the same line, use block comment - // - if it consists of multiple lines, append a new line below the definition + // - if it's outside of the expression, only one, we don't know if it's before or after the eq sign, + // treat it as after + // - if it's outside of the expression, put it below the name and the eq sign unindented, and put the + // expression below the comment or two comments, indented + // - it it's in the expression, always wrap the expression into lines, and put the comment on its own + // line groups := groupASTByComments(ast) for i, g := range groups { diff --git a/format_test.go b/format_test.go index 430bf32..ec0ba36 100644 --- a/format_test.go +++ b/format_test.go @@ -226,34 +226,138 @@ func TestChoiceFormat(t *testing.T) { const testDoc = `/* foo */ +// bar + // bar + //baz + /* foo + bar baz */// foo bar baz +wschar:alias =// foo + /* bar */ " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; +wsc:ws = wschar | comment; + +block-comment:alias:nows /* foo */ // bar += "/*" ("*" [^/] | [^*])* "*/"; +line-comment:alias:nows /* +foo +*/ = "//" [^\n]*; +comment-segment:alias:nows = // bar +line-comment | block-comment; +ws-no-nl:alias:nows = " " | "\t" | "\b" | /* this one */ /* is a */ "\f" /* form feed */ // for sure +| "\r" | "\v"; +comment:nows = comment-segment /* segment is not the best name */ /* but */ (ws-no-nl* "\n"? ws-no-nl* // fine +comment-segment)*; + +any-char = "."; // equivalent to [^] + +// caution: newline is accepted +/* class not */ class-not = "^"; +class-char:nows = [^\\\[\]\^\-] | "\\" . /* foo +bar */; +char-range:nows = class-char "-" class-char // foo +; +char-class:nows = "[" class-not? (class-char | char-range)* "]"; // foo +/* bar +baz */ + +// newline is accepted +sequence-char:nows = [^\\"] | "\\" .; +char-sequence:nows = "\"" sequence-char* "\""; + +terminal:alias = any-char | char-class | char-sequence; + +symbol:nows = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+; + +group:alias = "(" expression ")"; + +number:alias:nows = [0-9]+; +count = number; +count-quantifier = "{" count "}"; +range-from = number; +range-to = number; +range-quantifier = "{" range-from? "," range-to? "}"; +one-or-more = "+"; +zero-or-more = "*"; +zero-or-one = "?"; +quantity:alias = count-quantifier + | range-quantifier + | one-or-more + | zero-or-more + | zero-or-one; + +item:nows = (terminal | symbol | group) quantity?; +sequence = item+; + +option:alias = terminal | symbol | group | sequence; + +// DOC: how the order matters +choice = option ("|" option)+; + +// DOC: not having 'not' needs some tricks sometimes + +expression:alias = terminal | symbol | group | sequence | choice; + +alias = "alias"; +ws = "ws"; +nows = "nows"; +kw = "kw"; +nokw = "nokw"; +failpass = "failpass"; +root = "root"; +flag:alias = alias | ws | nows | kw | nokw | failpass | root; +definition-name:alias:nows = symbol (":" flag)*; +definition = definition-name "=" expression; + +definitions:alias = definition (";"+ definition)*; +syntax:root = ";"* definitions? ";"*; +` + +const testDocCheck = `/* + foo +*/ // bar // bar // baz /* foo bar baz */ // foo bar baz -wschar:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; +wschar:alias = // foo + /* bar */ + " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; wsc:ws = wschar | comment; -block-comment:alias:nows = "/*" ("*" [^/] | [^*])* "*/"; -line-comment:alias:nows = "//" [^\n]*; -comment-segment:alias:nows = line-comment | block-comment; +block-comment:alias:nows = /* foo */ // bar + "/*" ("*" [^/] | [^*])* "*/"; +line-comment:alias:nows = /* + foo + */ + "//" [^\n]*; +comment-segment:alias:nows = // bar + line-comment | block-comment; ws-no-nl:alias:nows = " " | "\t" | "\b" /* this one */ /* is a */ | "\f" - /* form feed */ /* for sure */ + /* form feed */ // for sure | "\r" | "\v"; comment:nows = comment-segment /* segment is not the best name */ /* but */ - (ws-no-nl* "\n"? ws-no-nl* /* fine */ comment-segment)*; + ( ws-no-nl* + "\n"? + ws-no-nl* + // fine + comment-segment + )*; any-char = "."; // equivalent to [^] // caution: newline is accepted +/* class not */ class-not = "^"; -class-char:nows = [^\\\[\]\^\-] | "\\" .; -char-range:nows = class-char "-" class-char; -char-class:nows = "[" class-not? (class-char | char-range)* "]"; +class-char:nows = [^\\\[\]\^\-] | "\\" .; /* foo + bar */ +char-range:nows = class-char "-" class-char; // foo +char-class:nows = "[" class-not? (class-char | char-range)* "]"; // foo + /* bar + baz */ // newline is accepted sequence-char:nows = [^\\"] | "\\" .; @@ -295,20 +399,32 @@ definitions:alias = definition (";"+ definition)*; syntax:root = ";"* definitions? ";"*;` func TestDocFormat(t *testing.T) { - in := bytes.NewBufferString(testDoc) - s := &Syntax{} - if err := s.ReadSyntax(in); err != nil { - t.Fatal(err) - } + for _, test := range []struct{ title, in, out string }{{ + title: "format", + in: testDoc, + out: testDocCheck, + }, { + title: "check", + in: testDocCheck, + out: testDocCheck, + }} { + t.Run(test.title, func(t *testing.T) { + in := bytes.NewBufferString(test.in) + s := &Syntax{} + if err := s.ReadSyntax(in); err != nil { + t.Fatal(err) + } - out := bytes.NewBuffer(nil) - if err := s.Format(out); err != nil { - t.Fatal(err) - } + out := bytes.NewBuffer(nil) + if err := s.Format(out); err != nil { + t.Fatal(err) + } - if out.String() != testDoc { - t.Log(testDoc) - t.Log(out.String()) - t.Fatal() + if out.String() != test.out { + t.Log(test.out) + t.Log(out.String()) + t.Fatal() + } + }) } } diff --git a/syntax.treerack b/syntax.treerack index 6e25955..cc2df3f 100644 --- a/syntax.treerack +++ b/syntax.treerack @@ -6,21 +6,28 @@ //baz /* foo bar baz */// foo bar baz -wschar:alias = " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; +wschar:alias =// foo + /* bar */ " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; wsc:ws = wschar | comment; -block-comment:alias:nows = "/*" ("*" [^/] | [^*])* "*/"; -line-comment:alias:nows = "//" [^\n]*; -comment-segment:alias:nows = line-comment | block-comment; +block-comment:alias:nows /* foo */ // bar += "/*" ("*" [^/] | [^*])* "*/"; +line-comment:alias:nows /* +foo +*/ = "//" [^\n]*; +comment-segment:alias:nows = // bar +line-comment | block-comment; ws-no-nl:alias:nows = " " | "\t" | "\b" | /* this one */ /* is a */ "\f" /* form feed */ /* for sure */ | "\r" | "\v"; comment:nows = comment-segment /* segment is not the best name */ /* but */ (ws-no-nl* "\n"? ws-no-nl* /* fine */ comment-segment)*; any-char = "."; // equivalent to [^] // caution: newline is accepted -class-not = "^"; -class-char:nows = [^\\\[\]\^\-] | "\\" .; -char-range:nows = class-char "-" class-char; +/* class not */ class-not = "^"; +class-char:nows = [^\\\[\]\^\-] | "\\" . /* foo +bar */; +char-range:nows = class-char "-" class-char // foo +; char-class:nows = "[" class-not? (class-char | char-range)* "]"; // newline is accepted @@ -58,22 +65,18 @@ choice = option ("|" option)+; // DOC: not having 'not' needs some tricks sometimes -expression:alias = terminal - | symbol - | group - | sequence - | choice; +expression:alias = terminal | symbol | group | sequence | choice; -alias = "alias"; +alias = "alias"; ws = "ws"; -nows = "nows"; +nows = "nows"; kw = "kw"; nokw = "nokw"; -failpass = "failpass"; +failpass = "failpass"; root = "root"; -flag:alias = alias | ws | nows | kw | nokw | failpass | root; +flag:alias = alias | ws | nows | kw | nokw | failpass | root; definition-name:alias:nows = symbol (":" flag)*; -definition = definition-name "=" expression; +definition = definition-name "=" expression; definitions:alias = definition (";"+ definition)*; syntax:root = ";"* definitions? ";"*;