diff --git a/doc/example/url.treerack b/doc/example/url.treerack new file mode 100644 index 0000000..5ac1d8a --- /dev/null +++ b/doc/example/url.treerack @@ -0,0 +1,86 @@ +// basd on RFC3986 and RFC6874 + +// char types: +digit:alias:failpass = [0-9]; +hex:alias:failpass = [0-9a-fA-F]; +alpha:alias:failpass = [a-zA-Z]; +delimiter:alias:failpass = ":" | "/" | "?" | "#" | "[" | "]" | "@"; +subdelimiter:alias:failpass = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="; +unreserved:alias:failpass = alpha | digit | "-" | "." | "_" | "~"; +reserved:alias:failpass = delimiter | subdelimiter; +percent-encoded:alias:failpass = "%" hex{2}; +path-char:alias:failpass = unreserved | percent-encoded | subdelimiter | ":" | "@"; + +// scheme: +scheme = alpha (alpha | digit | [+-.])*; + +// userinfo: +userinfo = (unreserved | percent-encoded | subdelimiter | ":")*; + +// IPv4: +dec-byte:alias:failpass = digit | [1-9] digit | "1" digit{2} | "2" [0-4] digit | "25" [0-5]; +ipv4:failpass = dec-byte ("." dec-byte){3}; + +// IPv6: +h16:alias:failpass = hex{1,4}; +ls32:alias:failpass = h16 ":" h16 | ipv4; +ipv6:failpass = (h16 ":"){6} ls32 + | "::" (h16 ":"){5} ls32 + | h16? "::" (h16 ":"){4} ls32 + | ((h16 ":")? h16)? "::" (h16 ":"){3} ls32 + | ((h16 ":"){,2} h16)? "::" (h16 ":"){2} ls32 + | ((h16 ":"){,3} h16)? "::" h16 ":" ls32 + | ((h16 ":"){,4} h16)? "::" ls32 + | ((h16 ":"){,5} h16)? "::" h16 + | ((h16 ":"){,6} h16)? "::"; +zone-id:alias:failpass = (unreserved | percent-encoded)+; +ipv6-zone:failpass = ipv6 "%25" zone-id; // RFC6874 + +// host: +registry-name-rfc:failpass = (unreserved | percent-encoded | subdelimiter)*; // all RFC chars allowed +dns-label:alias:failpass = (alpha | digit) ("-"* (alpha | digit)+)*; +domain-name:failpass = dns-label ("." dns-label)* "."?; // DNS compatible +hostname-rfc = ipv4 | "[" (ipv6 | ipv6-zone) "]" | registry-name-rfc; +hostname = ipv4 | "[" (ipv6 | ipv6-zone) "]" | domain-name; +port = digit*; +host-rfc:alias:failpass = hostname-rfc (":" port)?; +host:alias:failpass = hostname (":" port)?; + +// path: +segment:alias:failpass = path-char*; +segment-non-zero:alias:failpass = path-char+; +segment-non-zero-no-colon:alias:failpass = (unreserved | percent-encoded | subdelimiter | "@")+; +path-absolute-or-empty = ("/" segment)*; +path-absolute = "/" (segment-non-zero ("/" segment)*)?; +path-rootless = segment-non-zero ("/" segment)*; +path-noscheme = segment-non-zero-no-colon ("/" segment)*; +path-empty = ""; +query = (path-char | "/" | "?")*; +fragment = (path-char | "/" | "?")*; + +// composed together: +authority-rfc:alias:failpass = (userinfo "@")? host-rfc; +authority:alias:failpass = (userinfo "@")? host; +hierarchy-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty + | path-absolute + | path-rootless + | path-empty; +hierarchy-part:alias:failpass = "//" authority path-absolute-or-empty + | path-absolute + | path-rootless + | path-empty; +relative-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty + | path-absolute + | path-noscheme + | path-empty; +relative-part:alias:failpass = "//" authority path-absolute-or-empty + | path-absolute + | path-noscheme + | path-empty; +absolute-url-rfc = scheme ":" hierarchy-part-rfc ("?" query)? ("#" fragment)?; +absolute-url = scheme ":" hierarchy-part ("?" query)? ("#" fragment)?; +relative-url-rfc = relative-part-rfc ("?" query)? ("#" fragment)?; +relative-url = relative-part ("?" query)? ("#" fragment)?; + +// supporting four possible types URLs: +url:root = absolute-url | absolute-url-rfc | relative-url | relative-url-rfc; diff --git a/doc/manual.md b/doc/manual.md index 2a208bc..5cbcbaa 100644 --- a/doc/manual.md +++ b/doc/manual.md @@ -624,6 +624,6 @@ following workflow: - treerack command help: [../cmd/treerack/readme.md](../cmd/treerack/readme.md) or, if the command is installed, `man treerack`, or `path/to/treerack help` - the arithmetic calculator example: [./example/acalc](./example/acalc). -- additional example: [./example](./example) +- additional examples: [./example](./example) Happy parsing! diff --git a/doc/syntax.md b/doc/syntax.md index fb8592e..1f5d7ee 100644 --- a/doc/syntax.md +++ b/doc/syntax.md @@ -115,5 +115,6 @@ Comments follow C-style syntax and are ignored by the definition parser. ## Examples - [JSON](example/json.treerack) +- [URL](doc/example/url.treerack) - [Scheme](example/scheme.treerack) - [Treerack (itself)](../syntax.treerack) diff --git a/format_test.go b/format_test.go index 8be6eac..5999d52 100644 --- a/format_test.go +++ b/format_test.go @@ -399,6 +399,170 @@ definitions:alias = definition (";"+ definition)*; syntax:root = ";"* definitions? ";"*; ` +const testDocURL = `// basd on RFC3986 and RFC6874 + +// char types: +digit:alias:failpass = [0-9]; +hex:alias:failpass = [0-9a-fA-F]; +alpha:alias:failpass = [a-zA-Z]; +delimiter:alias:failpass = ":" | "/" | "?" | "#" | "[" | "]" | "@"; +subdelimiter:alias:failpass = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="; +unreserved:alias:failpass = alpha | digit | "-" | "." | "_" | "~"; +reserved:alias:failpass = delimiter | subdelimiter; +percent-encoded:alias:failpass = "%" hex{2}; +path-char:alias:failpass = unreserved | percent-encoded | subdelimiter | ":" | "@"; + +// scheme: +scheme = alpha (alpha | digit | [+-.])*; + +// userinfo: +userinfo = (unreserved | percent-encoded | subdelimiter | ":")*; + +// IPv4: +dec-byte:alias:failpass = digit | [1-9] digit | "1" digit{2} | "2" [0-4] digit | "25" [0-5]; +ipv4:failpass = dec-byte ("." dec-byte){3}; + +// IPv6: +h16:alias:failpass = hex{1,4}; +ls32:alias:failpass = (h16 ":" h16) | ipv4; +ipv6:failpass = (h16 ":"){6} ls32 + | "::" (h16 ":"){5} ls32 + | h16? "::" (h16 ":"){4} ls32 + | ((h16 ":")? h16)? "::" (h16 ":"){3} ls32 + | ((h16 ":"){,2} h16)? "::" (h16 ":"){2} ls32 + | ((h16 ":"){,3} h16)? "::" h16 ":" ls32 + | ((h16 ":"){,4} h16)? "::" ls32 + | ((h16 ":"){,5} h16)? "::" h16 + | ((h16 ":"){,6} h16)? "::" ; +zone-id:alias:failpass = (unreserved | percent-encoded)+; +ipv6-zone:failpass = ipv6 "%25" zone-id; // RFC6874 + +// host: +registry-name-rfc:failpass = (unreserved | percent-encoded | subdelimiter)*; // all RFC chars allowed +dns-label:alias:failpass = (alpha | digit) ("-"* (alpha | digit)+)*; +domain-name:failpass = dns-label ("." dns-label)* "."?; // DNS compatible +hostname-rfc = ipv4 | "[" (ipv6 | ipv6-zone) "]" | registry-name-rfc; +hostname = ipv4 | "[" (ipv6 | ipv6-zone) "]" | domain-name; +port = digit*; +host-rfc:alias:failpass = hostname-rfc (":" port)?; +host:alias:failpass = hostname (":" port)?; + +// path: +segment:alias:failpass = path-char*; +segment-non-zero:alias:failpass = path-char+; +segment-non-zero-no-colon:alias:failpass = (unreserved | percent-encoded | subdelimiter | "@")+; +path-absolute-or-empty = ("/" segment)*; +path-absolute = "/" (segment-non-zero ("/" segment)*)?; +path-rootless = segment-non-zero ("/" segment)*; +path-noscheme = segment-non-zero-no-colon ("/" segment)*; +path-empty = ""; +query = (path-char | "/" | "?")*; +fragment = (path-char | "/" | "?")*; + +// composed together: +authority-rfc:alias:failpass = (userinfo "@")? host-rfc; +authority:alias:failpass = (userinfo "@")? host; +hierarchy-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty | path-absolute | path-rootless | path-empty; +hierarchy-part:alias:failpass = "//" authority path-absolute-or-empty | path-absolute | path-rootless | path-empty; +relative-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty | path-absolute | path-noscheme | path-empty; +relative-part:alias:failpass = "//" authority path-absolute-or-empty | path-absolute | path-noscheme | path-empty; +absolute-url-rfc = scheme ":" hierarchy-part-rfc ("?" query)? ("#" fragment)?; +absolute-url = scheme ":" hierarchy-part ("?" query)? ("#" fragment)?; +relative-url-rfc = relative-part-rfc ("?" query)? ("#" fragment)?; +relative-url = relative-part ("?" query)? ("#" fragment)?; + +// supporting four possible types URLs: +url:root = absolute-url | absolute-url-rfc | relative-url | relative-url-rfc; +` + +const testDocCheckURL = `// basd on RFC3986 and RFC6874 + +// char types: +digit:alias:failpass = [0-9]; +hex:alias:failpass = [0-9a-fA-F]; +alpha:alias:failpass = [a-zA-Z]; +delimiter:alias:failpass = ":" | "/" | "?" | "#" | "[" | "]" | "@"; +subdelimiter:alias:failpass = "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "="; +unreserved:alias:failpass = alpha | digit | "-" | "." | "_" | "~"; +reserved:alias:failpass = delimiter | subdelimiter; +percent-encoded:alias:failpass = "%" hex{2}; +path-char:alias:failpass = unreserved | percent-encoded | subdelimiter | ":" | "@"; + +// scheme: +scheme = alpha (alpha | digit | [+-.])*; + +// userinfo: +userinfo = (unreserved | percent-encoded | subdelimiter | ":")*; + +// IPv4: +dec-byte:alias:failpass = digit | [1-9] digit | "1" digit{2} | "2" [0-4] digit | "25" [0-5]; +ipv4:failpass = dec-byte ("." dec-byte){3}; + +// IPv6: +h16:alias:failpass = hex{1,4}; +ls32:alias:failpass = h16 ":" h16 | ipv4; +ipv6:failpass = (h16 ":"){6} ls32 + | "::" (h16 ":"){5} ls32 + | h16? "::" (h16 ":"){4} ls32 + | ((h16 ":")? h16)? "::" (h16 ":"){3} ls32 + | ((h16 ":"){,2} h16)? "::" (h16 ":"){2} ls32 + | ((h16 ":"){,3} h16)? "::" h16 ":" ls32 + | ((h16 ":"){,4} h16)? "::" ls32 + | ((h16 ":"){,5} h16)? "::" h16 + | ((h16 ":"){,6} h16)? "::"; +zone-id:alias:failpass = (unreserved | percent-encoded)+; +ipv6-zone:failpass = ipv6 "%25" zone-id; // RFC6874 + +// host: +registry-name-rfc:failpass = (unreserved | percent-encoded | subdelimiter)*; // all RFC chars allowed +dns-label:alias:failpass = (alpha | digit) ("-"* (alpha | digit)+)*; +domain-name:failpass = dns-label ("." dns-label)* "."?; // DNS compatible +hostname-rfc = ipv4 | "[" (ipv6 | ipv6-zone) "]" | registry-name-rfc; +hostname = ipv4 | "[" (ipv6 | ipv6-zone) "]" | domain-name; +port = digit*; +host-rfc:alias:failpass = hostname-rfc (":" port)?; +host:alias:failpass = hostname (":" port)?; + +// path: +segment:alias:failpass = path-char*; +segment-non-zero:alias:failpass = path-char+; +segment-non-zero-no-colon:alias:failpass = (unreserved | percent-encoded | subdelimiter | "@")+; +path-absolute-or-empty = ("/" segment)*; +path-absolute = "/" (segment-non-zero ("/" segment)*)?; +path-rootless = segment-non-zero ("/" segment)*; +path-noscheme = segment-non-zero-no-colon ("/" segment)*; +path-empty = ""; +query = (path-char | "/" | "?")*; +fragment = (path-char | "/" | "?")*; + +// composed together: +authority-rfc:alias:failpass = (userinfo "@")? host-rfc; +authority:alias:failpass = (userinfo "@")? host; +hierarchy-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty + | path-absolute + | path-rootless + | path-empty; +hierarchy-part:alias:failpass = "//" authority path-absolute-or-empty + | path-absolute + | path-rootless + | path-empty; +relative-part-rfc:alias:failpass = "//" authority-rfc path-absolute-or-empty + | path-absolute + | path-noscheme + | path-empty; +relative-part:alias:failpass = "//" authority path-absolute-or-empty + | path-absolute + | path-noscheme + | path-empty; +absolute-url-rfc = scheme ":" hierarchy-part-rfc ("?" query)? ("#" fragment)?; +absolute-url = scheme ":" hierarchy-part ("?" query)? ("#" fragment)?; +relative-url-rfc = relative-part-rfc ("?" query)? ("#" fragment)?; +relative-url = relative-part ("?" query)? ("#" fragment)?; + +// supporting four possible types URLs: +url:root = absolute-url | absolute-url-rfc | relative-url | relative-url-rfc; +` + func TestDocFormat(t *testing.T) { for _, test := range []struct{ title, in, out string }{{ title: "format", @@ -408,6 +572,14 @@ func TestDocFormat(t *testing.T) { title: "check", in: testDocCheck, out: testDocCheck, + }, { + title: "format url", + in: testDocURL, + out: testDocCheckURL, + }, { + title: "check url", + in: testDocCheckURL, + out: testDocCheckURL, }} { t.Run(test.title, func(t *testing.T) { in := bytes.NewBufferString(test.in) diff --git a/Makefile b/makefile similarity index 100% rename from Makefile rename to makefile diff --git a/readme.md b/readme.md index 14d4d52..8085247 100644 --- a/readme.md +++ b/readme.md @@ -9,6 +9,7 @@ its Abstract Syntax Tree (AST) representation. It utilizes a custom syntax defin ## Examples - **JSON**: [doc/example/json.treerack](doc/example/json.treerack) +- **URL**: [doc/example/url.treerack](doc/example/url.treerack) - **Scheme**: [doc/example/scheme.treerack](doc/example/scheme.treerack) - **Treerack (self-definition)**: [syntax.treerack](syntax.treerack) @@ -34,6 +35,12 @@ cd treerack make install ``` +Installing it to one's home directory: + +``` +prefix=~/.local make install +``` + Alternatively ("best effort" basis): ``` diff --git a/syntax.treerack b/syntax.treerack index ae0d7da..53a3137 100644 --- a/syntax.treerack +++ b/syntax.treerack @@ -50,7 +50,7 @@ sequence = item+; option:alias = terminal | symbol | group | sequence; choice = option ("|" option)+; -// flags control how the subtrees of the individual parser definitions are handled: +// flags control AST generation, whitespace handling and error propagation: alias = "alias"; ws = "ws"; nows = "nows";