2018-01-05 19:06:10 +01:00
|
|
|
package treerack
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bufio"
|
|
|
|
|
"errors"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io"
|
|
|
|
|
)
|
|
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// CommitType controls how the output of a named parser is handled and represented in the resulting AST.
|
2018-01-05 19:06:10 +01:00
|
|
|
type CommitType int
|
|
|
|
|
|
|
|
|
|
const (
|
2026-01-18 22:52:27 +01:00
|
|
|
|
|
|
|
|
// None indicates the default behavior: parsed segments are represented as named nodes in the AST.
|
|
|
|
|
// Whitespace handling inherits the syntax's global settings.
|
|
|
|
|
None CommitType = 0
|
|
|
|
|
|
|
|
|
|
// Alias treats the parser as a pass-through. Validated segments are included in the AST node of the
|
|
|
|
|
// enclosing parser rather than creating a distinct node.
|
2018-01-05 19:06:10 +01:00
|
|
|
Alias CommitType = 1 << iota
|
2026-01-18 22:52:27 +01:00
|
|
|
|
|
|
|
|
// Whitespace designates a parser (typically a character sequence) to be treated as ignored whitespace
|
|
|
|
|
// throughout the input.
|
2018-01-05 19:06:10 +01:00
|
|
|
Whitespace
|
2026-01-18 22:52:27 +01:00
|
|
|
|
|
|
|
|
// NoWhitespace prevents the automatic skipping of defined whitespace characters within specific sequences.
|
2018-01-05 19:06:10 +01:00
|
|
|
NoWhitespace
|
2026-01-18 22:52:27 +01:00
|
|
|
|
|
|
|
|
// Keyword marks a sequence as a reserved keyword. This allows specific sequences to be protected or
|
|
|
|
|
// restricted in certain contexts via the NoKeyword flag.
|
2019-02-02 18:07:10 +01:00
|
|
|
Keyword
|
2026-01-18 22:52:27 +01:00
|
|
|
|
|
|
|
|
// NoKeyword prevents the parser from matching sequences marked as Keywords.
|
2019-02-02 18:07:10 +01:00
|
|
|
NoKeyword
|
2026-01-18 22:52:27 +01:00
|
|
|
|
|
|
|
|
// FailPass configures the parser to propagate failure up to the enclosing parser rather than handling it
|
|
|
|
|
// locally.
|
2018-01-05 19:06:10 +01:00
|
|
|
FailPass
|
2026-01-18 22:52:27 +01:00
|
|
|
|
2026-06-06 05:23:32 +02:00
|
|
|
// NoFailPass configures the parser to handle the failure locally. This overrides automatic fail-pass,
|
|
|
|
|
// e.g. in case of Alias definitions.
|
|
|
|
|
NoFailPass
|
|
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// Root explicitly marks the parser as the root of the syntax. By default, the last defined parser is
|
|
|
|
|
// considered the root.
|
2018-01-05 19:06:10 +01:00
|
|
|
Root
|
|
|
|
|
|
|
|
|
|
userDefined
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type formatFlags int
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
formatNone formatFlags = 0
|
|
|
|
|
formatPretty formatFlags = 1 << iota
|
|
|
|
|
formatIncludeComments
|
|
|
|
|
)
|
|
|
|
|
|
2026-05-30 20:14:24 +02:00
|
|
|
type formatOptions struct {
|
|
|
|
|
mode formatFlags
|
|
|
|
|
targetWidth int
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-06 21:19:04 +02:00
|
|
|
// TraceEvent indicates the type of a trace entry.
|
|
|
|
|
type TraceEvent int
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
|
|
|
|
|
// Enter is the type of a trace entry set when entering the parser of a definition.
|
|
|
|
|
Enter TraceEvent = iota
|
|
|
|
|
|
|
|
|
|
// Success is the type of a trace entry set when the parser of a definition is returning with success.
|
|
|
|
|
Success
|
|
|
|
|
|
|
|
|
|
// Fail is the type of a trace entry set when the parser of a definition is returning with failure.
|
|
|
|
|
Fail
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// TraceEntry represents trace events set during parsing with the purpose of helping to identify problems with
|
|
|
|
|
// the input being parsed or to debug a parser.
|
|
|
|
|
type TraceEntry struct {
|
|
|
|
|
|
|
|
|
|
// Level indicates the level of descend while parsing. The root parser has a level of 0.
|
|
|
|
|
Level int
|
|
|
|
|
|
|
|
|
|
// Parser is the name of the parser definition that set the trace event.
|
|
|
|
|
Parser string
|
|
|
|
|
|
|
|
|
|
// From is the offset in the input where the parser, that set the trace event, has started.
|
|
|
|
|
From int
|
|
|
|
|
|
|
|
|
|
// To is the offset in the input where the parser, that set the trace event, has left.
|
|
|
|
|
To int
|
|
|
|
|
|
|
|
|
|
// FromLine is the line number where the parser, that set the trace event, has started.
|
|
|
|
|
FromLine int
|
|
|
|
|
|
|
|
|
|
// FromCol is the column number where the parser, that set the trace event, has started.
|
|
|
|
|
FromCol int
|
|
|
|
|
|
|
|
|
|
// ToLine is the line number where the parser, that set the trace event, has left.
|
|
|
|
|
ToLine int
|
|
|
|
|
|
|
|
|
|
// ToCol is the column number where the parser, that set the trace event, has left.
|
|
|
|
|
ToCol int
|
|
|
|
|
|
|
|
|
|
// Event is the type of the trace event.
|
|
|
|
|
Event TraceEvent
|
|
|
|
|
|
|
|
|
|
// Reason is the reason why the parser, that set the trace event, has failed.
|
|
|
|
|
Reason string
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// ParseError reports a failure to match the input text against the defined syntax.
|
2018-01-05 19:06:10 +01:00
|
|
|
type ParseError struct {
|
2026-06-06 21:19:04 +02:00
|
|
|
inputContent []rune
|
2018-01-05 19:06:10 +01:00
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// Input denotes the name of the input source (e.g., filename), or "<input>" if unavailable.
|
2018-01-05 19:06:10 +01:00
|
|
|
Input string
|
|
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// Offset is the index of the right-most token where the parse failed.
|
2018-01-05 19:06:10 +01:00
|
|
|
Offset int
|
|
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// Line is the zero-based line number of the failure position.
|
2018-01-05 19:06:10 +01:00
|
|
|
//
|
2026-01-18 22:52:27 +01:00
|
|
|
// For display purposes, increment by one.
|
2018-01-05 19:06:10 +01:00
|
|
|
Line int
|
|
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// Column is the zero-based column index of the failure position.
|
2018-01-05 19:06:10 +01:00
|
|
|
Column int
|
|
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// Definition identifies the name of the specific parser definition where the match failed.
|
2018-01-05 19:06:10 +01:00
|
|
|
Definition string
|
2026-06-06 21:19:04 +02:00
|
|
|
|
|
|
|
|
// Trace contains the last parsing steps leading to the parse error.
|
|
|
|
|
Trace []TraceEntry
|
|
|
|
|
|
|
|
|
|
// UnexpectedInputLine has a non-negative value, if the parse failed due to unexpected input.
|
|
|
|
|
UnexpectedInputLine int
|
|
|
|
|
|
|
|
|
|
// UnexpectedInputLine has a non-negative value, if the parse failed due to unexpected input.
|
|
|
|
|
UnexpectedInputCol int
|
2018-01-05 19:06:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type parser interface {
|
|
|
|
|
nodeName() string
|
|
|
|
|
nodeID() int
|
|
|
|
|
commitType() CommitType
|
|
|
|
|
parse(*context)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type builder interface {
|
|
|
|
|
nodeName() string
|
|
|
|
|
nodeID() int
|
2026-06-01 22:26:27 +02:00
|
|
|
build(*context) ([]Node, bool)
|
2018-01-05 19:06:10 +01:00
|
|
|
}
|
|
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences.
|
2018-01-05 19:06:10 +01:00
|
|
|
var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
|
|
|
|
|
|
2026-06-06 21:19:04 +02:00
|
|
|
// InputContent returns the specified slice from the consumed parse input. If to < 0, to is length of the
|
|
|
|
|
// available content.
|
|
|
|
|
func (pe *ParseError) InputContent(from, to int) []rune {
|
|
|
|
|
if to < 0 {
|
|
|
|
|
to = len(pe.inputContent)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if to <= from {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
c := make([]rune, to-from)
|
|
|
|
|
copy(c, pe.inputContent[from:])
|
|
|
|
|
return c
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-18 22:52:27 +01:00
|
|
|
// Error returns the formatted failure message.
|
2018-01-05 19:06:10 +01:00
|
|
|
func (pe *ParseError) Error() string {
|
2026-06-06 21:19:04 +02:00
|
|
|
if pe.UnexpectedInputLine >= 0 && pe.UnexpectedInputCol >= 0 {
|
|
|
|
|
return fmt.Sprintf(
|
|
|
|
|
"%s:%d:%d:parse failed, unexpected input at %d:%d",
|
|
|
|
|
pe.Input,
|
|
|
|
|
pe.UnexpectedInputLine+1,
|
|
|
|
|
pe.UnexpectedInputCol+1,
|
|
|
|
|
pe.UnexpectedInputLine+1,
|
|
|
|
|
pe.UnexpectedInputCol+1,
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-05 19:06:10 +01:00
|
|
|
return fmt.Sprintf(
|
2026-06-06 21:19:04 +02:00
|
|
|
"%s:%d:%d:parse failed, parsing: %s, at %d:%d",
|
2018-01-05 19:06:10 +01:00
|
|
|
pe.Input,
|
|
|
|
|
pe.Line+1,
|
|
|
|
|
pe.Column+1,
|
|
|
|
|
pe.Definition,
|
2026-06-06 21:19:04 +02:00
|
|
|
pe.Line+1,
|
|
|
|
|
pe.Column+1,
|
2018-01-05 19:06:10 +01:00
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-06 21:48:50 +02:00
|
|
|
func parseInput(r io.Reader, p parser, b builder, kw []parser, maxTraceLength int) (Node, error) {
|
|
|
|
|
c := newContext(bufio.NewReader(r), kw, maxTraceLength)
|
2018-01-05 19:06:10 +01:00
|
|
|
p.parse(c)
|
|
|
|
|
if c.readErr != nil {
|
2026-06-01 22:26:27 +02:00
|
|
|
return Node{}, c.readErr
|
2018-01-05 19:06:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if err := c.finalizeParse(p); err != nil {
|
|
|
|
|
if perr, ok := err.(*ParseError); ok {
|
|
|
|
|
perr.Input = "<input>"
|
|
|
|
|
}
|
|
|
|
|
|
2026-06-01 22:26:27 +02:00
|
|
|
return Node{}, err
|
2018-01-05 19:06:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
c.offset = 0
|
|
|
|
|
c.results.resetPending()
|
|
|
|
|
|
|
|
|
|
n, _ := b.build(c)
|
|
|
|
|
return n[0], nil
|
|
|
|
|
}
|