package treerack import ( "bytes" "fmt" "testing" ) type formatDefinitionTestItem struct { title string definition string syntax string output string } func testDefinitionFormatItem(t *testing.T, treerack *Syntax, f formatFlags, test formatDefinitionTestItem) func(t *testing.T) { return func(t *testing.T) { syntax := test.syntax if test.definition != "" { syntax = fmt.Sprintf("def = %s", test.definition) } nodes, err := treerack.Parse(bytes.NewBufferString(syntax)) if err != nil { t.Fatal(err) } s := &Syntax{} if err := define(s, nodes); err != nil { t.Fatal(err) } def, ok := s.registry.definition["def"] if !ok { t.Fatal("failed to register definition") } output := def.format(s.registry, formatOptions{mode: f}) if output != test.output { t.Error("invalid definition format") t.Log("got: ", output) t.Log("expected:", test.output) } } } func testDefinitionFormat(t *testing.T, f formatFlags, tests []formatDefinitionTestItem) { treerack, err := bootSyntax() if err != nil { t.Fatal(err) } for _, test := range tests { t.Run(test.title, testDefinitionFormatItem(t, treerack, f, test)) } } func TestCharFormat(t *testing.T) { testDefinitionFormat(t, formatNone, []formatDefinitionTestItem{{ title: "empty", definition: "[]", output: "[]", }, { title: "one char", definition: "[a]", output: `"a"`, }, { title: "escaped char", definition: "[\\a]", output: `"a"`, }, { title: "escaped control char", definition: "[\\^]", output: `"^"`, }, { title: "escaped whitespace char", definition: "[\\n]", output: `"\n"`, }, { title: "escaped verbatim whitespace char", definition: "[\n]", output: `"\n"`, }, { title: "escaped range", definition: "[\\b-\\v]", output: "[\\b-\\v]", }, { title: "anything", definition: ".", output: ".", }, { title: "not something", definition: "[^abc]", output: "[^abc]", }, { title: "range", definition: "[a-z]", output: "[a-z]", }, { title: "range and char mixed", definition: "[a-z_\\-A-Z]", output: "[_\\-a-zA-Z]", }}) } func TestSequenceFormat(t *testing.T) { testDefinitionFormat(t, formatNone, []formatDefinitionTestItem{{ title: "empty char sequence", syntax: `def = ""`, output: `""`, }, { title: "char sequence", syntax: `def = "abc"`, output: `"abc"`, }, { title: "char sequence, escaped", syntax: `def = "\\n"`, output: `"\\n"`, }, { title: "chars", syntax: `def = "abc" [a-z]`, output: `"abc" [a-z]`, }, { title: "quantifiers, 0-or-more, single char", syntax: `def = "a"*`, output: `"a"*`, }, { title: "quantifiers, 0-or-more", syntax: `def = "abc"*`, output: `"abc"*`, }, { title: "quantifiers, 1-or-more, single char", syntax: `def = "a"+`, output: `"a"+`, }, { title: "quantifiers, 1-or-more", syntax: `def = "abc"+`, output: `"abc"+`, }, { title: "quantifiers, 0-or-one, single char", syntax: `def = "a"?`, output: `"a"?`, }, { title: "quantifiers, 0-or-one", syntax: `def = "abc"?`, output: `"abc"?`, }, { title: "quantifiers, exact number, single char", syntax: `def = "a"{3}`, output: `"a"{3}`, }, { title: "quantifiers, exact number", syntax: `def = "abc"{3}`, output: `"abc"{3}`, }, { title: "quantifiers, max, single char", syntax: `def = "a"{0, 3}`, output: `"a"{,3}`, }, { title: "quantifiers, max", syntax: `def = "abc"{0, 3}`, output: `"abc"{,3}`, }, { title: "quantifiers, min, single char", syntax: `def = "a"{3,}`, output: `"a"{3,}`, }, { title: "quantifiers, min", syntax: `def = "abc"{3,}`, output: `"abc"{3,}`, }, { title: "quantifiers, range, single char", syntax: `def = "a"{3, 9}`, output: `"a"{3,9}`, }, { title: "quantifiers, range", syntax: `def = "abc"{3, 9}`, output: `"abc"{3,9}`, }, { title: "symbols", syntax: `a = "a"; b = "b"; c = "c"; def = a b c`, output: "a b c", }, { title: "choice in sequence, single char", syntax: `def = "a" ("b" | "c")`, output: `"a" ("b" | "c")`, }, { title: "choice in sequence", syntax: `def = "abc" ("def" | "ghi")`, output: `"abc" ("def" | "ghi")`, }, { title: "grouped quantifier, single char", syntax: `def = ("a" "b"){3}`, output: `("a" "b"){3}`, }, { title: "grouped quantifier", syntax: `def = ("abc" "def"){3}`, output: `("abc" "def"){3}`, }}) } func TestChoiceFormat(t *testing.T) { testDefinitionFormat(t, formatNone, []formatDefinitionTestItem{{ title: "choice of char sequences, single char", syntax: `def = "a" | "b" | "c"`, output: `"a" | "b" | "c"`, }, { title: "choice of char sequences", syntax: `def = "abc" | "def" | "ghi"`, output: `"abc" | "def" | "ghi"`, }, { title: "choice of inline sequences, single char", syntax: `def = "a" "b" | "c" "d" | "e" "f"`, output: `"a" "b" | "c" "d" | "e" "f"`, }, { title: "choice of inline sequences", syntax: `def = "abc" "def" | "ghi" "jkl" | "mno" "pqr"`, output: `"abc" "def" | "ghi" "jkl" | "mno" "pqr"`, }, { title: "choice of symbol", syntax: `a = "a"; b = "b"; c = "c"; def = a | b | c`, output: "a | b | c", }}) } const testDoc = `/* foo */ // bar // bar //baz /* foo bar baz */// foo bar baz wschar:alias =// foo /* bar */ " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; wsc:ws = wschar | comment; block-comment:alias:nows /* foo */ // bar = "/*" ("*" [^/] | [^*])* "*/"; line-comment:alias:nows /* foo */ = "//" [^\n]*; comment-segment:alias:nows = // bar line-comment | block-comment; ws-no-nl:alias:nows = " " | "\t" | "\b" | /* this one */ /* is a */ "\f" /* form feed */ // for sure | "\r" | "\v"; comment:nows = comment-segment /* segment is not the best name */ /* but */ (ws-no-nl* "\n"? ws-no-nl* // fine comment-segment)*; any-char = "."; // equivalent to [^] // caution: newline is accepted /* class not */ class-not = "^"; class-char:nows = [^\\\[\]\^\-] | "\\" . /* foo bar */; char-range:nows = class-char "-" class-char // foo ; char-class:nows = "[" class-not? (class-char | char-range)* "]"; // foo /* bar baz */ // newline is accepted sequence-char:nows = [^\\"] | "\\" .; char-sequence:nows = "\"" sequence-char* "\""; terminal:alias = any-char | char-class | char-sequence; symbol:nows = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+; group:alias = "(" expression ")"; number:alias:nows = [0-9]+; count = number; count-quantifier = "{" count "}"; range-from = number; range-to = number; range-quantifier = "{" range-from? "," range-to? "}"; one-or-more = "+"; zero-or-more = "*"; zero-or-one = "?"; quantity:alias = count-quantifier | range-quantifier | one-or-more | zero-or-more | zero-or-one; item:nows = (terminal | symbol | group) quantity?; sequence = item+; option:alias = terminal | symbol | group | sequence; // DOC: how the order matters choice = option ("|" option)+; // DOC: not having 'not' needs some tricks sometimes expression:alias = terminal | symbol | group | sequence | choice; alias = "alias"; ws = "ws"; nows = "nows"; kw = "kw"; nokw = "nokw"; failpass = "failpass"; root = "root"; flag:alias = alias | ws | nows | kw | nokw | failpass | root; definition-name:alias:nows = symbol (":" flag)*; definition = definition-name "=" expression; definitions:alias = definition (";"+ definition)*; syntax:root = ";"* definitions? ";"*; ` const testDocCheck = `/* foo */ // bar // bar // baz /* foo bar baz */ // foo bar baz wschar:alias = // foo /* bar */ " " | "\t" | "\n" | "\b" | "\f" | "\r" | "\v"; wsc:ws = wschar | comment; block-comment:alias:nows = /* foo */ // bar "/*" ("*" [^/] | [^*])* "*/"; line-comment:alias:nows = /* foo */ "//" [^\n]*; comment-segment:alias:nows = // bar line-comment | block-comment; ws-no-nl:alias:nows = " " | "\t" | "\b" /* this one */ /* is a */ | "\f" /* form feed */ // for sure | "\r" | "\v"; comment:nows = comment-segment /* segment is not the best name */ /* but */ ( ws-no-nl* "\n"? ws-no-nl* // fine comment-segment )*; any-char = "."; // equivalent to [^] // caution: newline is accepted /* class not */ class-not = "^"; class-char:nows = [^\\\[\]\^\-] | "\\" .; /* foo bar */ char-range:nows = class-char "-" class-char; // foo char-class:nows = "[" class-not? (class-char | char-range)* "]"; // foo /* bar baz */ // newline is accepted sequence-char:nows = [^\\"] | "\\" .; char-sequence:nows = "\"" sequence-char* "\""; terminal:alias = any-char | char-class | char-sequence; symbol:nows = [^\\ \n\t\b\f\r\v/.\[\]\"{}\^+*?|():=;]+; group:alias = "(" expression ")"; number:alias:nows = [0-9]+; count = number; count-quantifier = "{" count "}"; range-from = number; range-to = number; range-quantifier = "{" range-from? "," range-to? "}"; one-or-more = "+"; zero-or-more = "*"; zero-or-one = "?"; quantity:alias = count-quantifier | range-quantifier | one-or-more | zero-or-more | zero-or-one; item:nows = (terminal | symbol | group) quantity?; sequence = item+; option:alias = terminal | symbol | group | sequence; // DOC: how the order matters choice = option ("|" option)+; // DOC: not having 'not' needs some tricks sometimes expression:alias = terminal | symbol | group | sequence | choice; alias = "alias"; ws = "ws"; nows = "nows"; kw = "kw"; nokw = "nokw"; failpass = "failpass"; root = "root"; flag:alias = alias | ws | nows | kw | nokw | failpass | root; definition-name:alias:nows = symbol (":" flag)*; definition = definition-name "=" expression; definitions:alias = definition (";"+ definition)*; syntax:root = ";"* definitions? ";"*; ` const testDocURL = `// basd on RFC3986 and RFC6874 // char types: digit:alias:failpass = [0-9]; hex:alias:failpass = [0-9a-fA-F]; alpha:alias:failpass = [a-zA-Z]; delimiter:alias:failpass = ":" | "/" | "?" | "#" | "[" | "]" | "@"; subdelimiter:alias:failpass = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="; unreserved:alias:failpass = alpha | digit | "-" | "." | "_" | "~"; reserved:alias:failpass = delimiter | subdelimiter; percent-encoded:alias:failpass = "%" hex{2}; path-char:alias:failpass = unreserved | percent-encoded | subdelimiter | ":" | "@"; // scheme: scheme = alpha (alpha | digit | [+-.])*; // userinfo: userinfo = (unreserved | percent-encoded | subdelimiter | ":")*; // IPv4: dec-byte:alias:failpass = digit | [1-9] digit | "1" digit{2} | "2" [0-4] digit | "25" [0-5]; ipv4:failpass = dec-byte ("." dec-byte){3}; // IPv6: h16:alias:failpass = hex{1,4}; ls32:alias:failpass = (h16 ":" h16) | ipv4; ipv6:failpass = (h16 ":"){6} ls32 | "::" (h16 ":"){5} ls32 | h16? "::" (h16 ":"){4} ls32 | ((h16 ":")? h16)? "::" (h16 ":"){3} ls32 | ((h16 ":"){,2} h16)? "::" (h16 ":"){2} ls32 | ((h16 ":"){,3} h16)? "::" h16 ":" ls32 | ((h16 ":"){,4} h16)? "::" ls32 | ((h16 ":"){,5} h16)? "::" h16 | ((h16 ":"){,6} h16)? "::" ; zone-id:alias:failpass = (unreserved | percent-encoded)+; ipv6-zone:failpass = ipv6 "%25" zone-id; // RFC6874 // host: registry-name-rfc:failpass = (unreserved | percent-encoded | subdelimiter)*; // all RFC chars allowed dns-label:alias:failpass = (alpha | digit) ("-"* (alpha | digit)+)*; domain-name:failpass = dns-label ("." dns-label)* "."?; // DNS compatible hostname-rfc = ipv4 | "[" (ipv6 | ipv6-zone) "]" | registry-name-rfc; hostname = ipv4 | "[" (ipv6 | ipv6-zone) "]" | domain-name; port = digit*; host-rfc:alias:failpass = hostname-rfc (":" port)?; host:alias:failpass = hostname (":" port)?; // path: segment:alias:failpass = path-char*; segment-non-zero:alias:failpass = path-char+; segment-non-zero-no-colon:alias:failpass = (unreserved | percent-encoded | subdelimiter | "@")+; path-absolute-or-empty = ("/" segment)*; path-absolute = "/" (segment-non-zero ("/" segment)*)?; path-rootless = segment-non-zero ("/" segment)*; path-noscheme = segment-non-zero-no-colon ("/" segment)*; path-empty = ""; query = (path-char | "/" | "?")*; fragment = (path-char | "/" | "?")*; // composed together: authority-rfc:alias:failpass = (userinfo "@")? host-rfc; authority:alias:failpass = (userinfo "@")? host; hierarchy-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty | path-absolute | path-rootless | path-empty; hierarchy-part:alias:failpass = "//" authority path-absolute-or-empty | path-absolute | path-rootless | path-empty; relative-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty | path-absolute | path-noscheme | path-empty; relative-part:alias:failpass = "//" authority path-absolute-or-empty | path-absolute | path-noscheme | path-empty; absolute-url-rfc = scheme ":" hierarchy-part-rfc ("?" query)? ("#" fragment)?; absolute-url = scheme ":" hierarchy-part ("?" query)? ("#" fragment)?; relative-url-rfc = relative-part-rfc ("?" query)? ("#" fragment)?; relative-url = relative-part ("?" query)? ("#" fragment)?; // supporting four possible types URLs: url:root = absolute-url | absolute-url-rfc | relative-url | relative-url-rfc; ` const testDocCheckURL = `// basd on RFC3986 and RFC6874 // char types: digit:alias:failpass = [0-9]; hex:alias:failpass = [0-9a-fA-F]; alpha:alias:failpass = [a-zA-Z]; delimiter:alias:failpass = ":" | "/" | "?" | "#" | "[" | "]" | "@"; subdelimiter:alias:failpass = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="; unreserved:alias:failpass = alpha | digit | "-" | "." | "_" | "~"; reserved:alias:failpass = delimiter | subdelimiter; percent-encoded:alias:failpass = "%" hex{2}; path-char:alias:failpass = unreserved | percent-encoded | subdelimiter | ":" | "@"; // scheme: scheme = alpha (alpha | digit | [+-.])*; // userinfo: userinfo = (unreserved | percent-encoded | subdelimiter | ":")*; // IPv4: dec-byte:alias:failpass = digit | [1-9] digit | "1" digit{2} | "2" [0-4] digit | "25" [0-5]; ipv4:failpass = dec-byte ("." dec-byte){3}; // IPv6: h16:alias:failpass = hex{1,4}; ls32:alias:failpass = h16 ":" h16 | ipv4; ipv6:failpass = (h16 ":"){6} ls32 | "::" (h16 ":"){5} ls32 | h16? "::" (h16 ":"){4} ls32 | ((h16 ":")? h16)? "::" (h16 ":"){3} ls32 | ((h16 ":"){,2} h16)? "::" (h16 ":"){2} ls32 | ((h16 ":"){,3} h16)? "::" h16 ":" ls32 | ((h16 ":"){,4} h16)? "::" ls32 | ((h16 ":"){,5} h16)? "::" h16 | ((h16 ":"){,6} h16)? "::"; zone-id:alias:failpass = (unreserved | percent-encoded)+; ipv6-zone:failpass = ipv6 "%25" zone-id; // RFC6874 // host: registry-name-rfc:failpass = (unreserved | percent-encoded | subdelimiter)*; // all RFC chars allowed dns-label:alias:failpass = (alpha | digit) ("-"* (alpha | digit)+)*; domain-name:failpass = dns-label ("." dns-label)* "."?; // DNS compatible hostname-rfc = ipv4 | "[" (ipv6 | ipv6-zone) "]" | registry-name-rfc; hostname = ipv4 | "[" (ipv6 | ipv6-zone) "]" | domain-name; port = digit*; host-rfc:alias:failpass = hostname-rfc (":" port)?; host:alias:failpass = hostname (":" port)?; // path: segment:alias:failpass = path-char*; segment-non-zero:alias:failpass = path-char+; segment-non-zero-no-colon:alias:failpass = (unreserved | percent-encoded | subdelimiter | "@")+; path-absolute-or-empty = ("/" segment)*; path-absolute = "/" (segment-non-zero ("/" segment)*)?; path-rootless = segment-non-zero ("/" segment)*; path-noscheme = segment-non-zero-no-colon ("/" segment)*; path-empty = ""; query = (path-char | "/" | "?")*; fragment = (path-char | "/" | "?")*; // composed together: authority-rfc:alias:failpass = (userinfo "@")? host-rfc; authority:alias:failpass = (userinfo "@")? host; hierarchy-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty | path-absolute | path-rootless | path-empty; hierarchy-part:alias:failpass = "//" authority path-absolute-or-empty | path-absolute | path-rootless | path-empty; relative-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty | path-absolute | path-noscheme | path-empty; relative-part:alias:failpass = "//" authority path-absolute-or-empty | path-absolute | path-noscheme | path-empty; absolute-url-rfc = scheme ":" hierarchy-part-rfc ("?" query)? ("#" fragment)?; absolute-url = scheme ":" hierarchy-part ("?" query)? ("#" fragment)?; relative-url-rfc = relative-part-rfc ("?" query)? ("#" fragment)?; relative-url = relative-part ("?" query)? ("#" fragment)?; // supporting four possible types URLs: url:root = absolute-url | absolute-url-rfc | relative-url | relative-url-rfc; ` func TestDocFormat(t *testing.T) { for _, test := range []struct{ title, in, out string }{{ title: "format", in: testDoc, out: testDocCheck, }, { title: "check", in: testDocCheck, out: testDocCheck, }, { title: "format url", in: testDocURL, out: testDocCheckURL, }, { title: "check url", in: testDocCheckURL, out: testDocCheckURL, }} { t.Run(test.title, func(t *testing.T) { in := bytes.NewBufferString(test.in) s := &Syntax{} if err := s.ReadSyntax(in); err != nil { t.Fatal(err) } out := bytes.NewBuffer(nil) if err := s.Format(out); err != nil { t.Fatal(err) } if out.String() != test.out { t.Log(test.out) t.Log(out.String()) t.Fatal() } }) } } func TestInProcessSyntaxFormat(t *testing.T) { s := &Syntax{} s.Class("whitespace-chars", Alias, false, []rune{' ', '\t', '\r', '\n'}, nil) s.Choice("whitespace", Whitespace, "whitespace-chars") s.Class("digit", Alias, false, nil, [][]rune{{'0', '9'}}) s.Sequence("number", NoWhitespace, SequenceItem{Name: "digit", Min: 1}) s.Class("operator", None, false, []rune{'+', '-'}, nil) s.Sequence( "expression", Root, SequenceItem{Name: "number"}, SequenceItem{Name: "operator"}, SequenceItem{Name: "number"}, ) var out bytes.Buffer if err := s.Format(&out); err != nil { t.Fatal(err) } const expect = `whitespace-chars:alias = [ \t\r\n]; whitespace:ws = whitespace-chars; digit:alias = [0-9]; number:nows = digit+; operator = [+\-]; expression:root = number operator number; ` if out.String() != expect { t.Fatal(out.String()) } }