1
0
treerack/syntaxhead.go
2026-06-06 21:19:04 +02:00

223 lines
5.8 KiB
Go

package treerack
import (
"bufio"
"errors"
"fmt"
"io"
)
// CommitType controls how the output of a named parser is handled and represented in the resulting AST.
type CommitType int
const (
// None indicates the default behavior: parsed segments are represented as named nodes in the AST.
// Whitespace handling inherits the syntax's global settings.
None CommitType = 0
// Alias treats the parser as a pass-through. Validated segments are included in the AST node of the
// enclosing parser rather than creating a distinct node.
Alias CommitType = 1 << iota
// Whitespace designates a parser (typically a character sequence) to be treated as ignored whitespace
// throughout the input.
Whitespace
// NoWhitespace prevents the automatic skipping of defined whitespace characters within specific sequences.
NoWhitespace
// Keyword marks a sequence as a reserved keyword. This allows specific sequences to be protected or
// restricted in certain contexts via the NoKeyword flag.
Keyword
// NoKeyword prevents the parser from matching sequences marked as Keywords.
NoKeyword
// FailPass configures the parser to propagate failure up to the enclosing parser rather than handling it
// locally.
FailPass
// NoFailPass configures the parser to handle the failure locally. This overrides automatic fail-pass,
// e.g. in case of Alias definitions.
NoFailPass
// Root explicitly marks the parser as the root of the syntax. By default, the last defined parser is
// considered the root.
Root
userDefined
)
type formatFlags int
const (
formatNone formatFlags = 0
formatPretty formatFlags = 1 << iota
formatIncludeComments
)
type formatOptions struct {
mode formatFlags
targetWidth int
}
// TraceEvent indicates the type of a trace entry.
type TraceEvent int
const (
// Enter is the type of a trace entry set when entering the parser of a definition.
Enter TraceEvent = iota
// Success is the type of a trace entry set when the parser of a definition is returning with success.
Success
// Fail is the type of a trace entry set when the parser of a definition is returning with failure.
Fail
)
// TraceEntry represents trace events set during parsing with the purpose of helping to identify problems with
// the input being parsed or to debug a parser.
type TraceEntry struct {
// Level indicates the level of descend while parsing. The root parser has a level of 0.
Level int
// Parser is the name of the parser definition that set the trace event.
Parser string
// From is the offset in the input where the parser, that set the trace event, has started.
From int
// To is the offset in the input where the parser, that set the trace event, has left.
To int
// FromLine is the line number where the parser, that set the trace event, has started.
FromLine int
// FromCol is the column number where the parser, that set the trace event, has started.
FromCol int
// ToLine is the line number where the parser, that set the trace event, has left.
ToLine int
// ToCol is the column number where the parser, that set the trace event, has left.
ToCol int
// Event is the type of the trace event.
Event TraceEvent
// Reason is the reason why the parser, that set the trace event, has failed.
Reason string
}
// ParseError reports a failure to match the input text against the defined syntax.
type ParseError struct {
inputContent []rune
// Input denotes the name of the input source (e.g., filename), or "<input>" if unavailable.
Input string
// Offset is the index of the right-most token where the parse failed.
Offset int
// Line is the zero-based line number of the failure position.
//
// For display purposes, increment by one.
Line int
// Column is the zero-based column index of the failure position.
Column int
// Definition identifies the name of the specific parser definition where the match failed.
Definition string
// Trace contains the last parsing steps leading to the parse error.
Trace []TraceEntry
// UnexpectedInputLine has a non-negative value, if the parse failed due to unexpected input.
UnexpectedInputLine int
// UnexpectedInputLine has a non-negative value, if the parse failed due to unexpected input.
UnexpectedInputCol int
}
type parser interface {
nodeName() string
nodeID() int
commitType() CommitType
parse(*context)
}
type builder interface {
nodeName() string
nodeID() int
build(*context) ([]Node, bool)
}
// ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences.
var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
// InputContent returns the specified slice from the consumed parse input. If to < 0, to is length of the
// available content.
func (pe *ParseError) InputContent(from, to int) []rune {
if to < 0 {
to = len(pe.inputContent)
}
if to <= from {
return nil
}
c := make([]rune, to-from)
copy(c, pe.inputContent[from:])
return c
}
// Error returns the formatted failure message.
func (pe *ParseError) Error() string {
if pe.UnexpectedInputLine >= 0 && pe.UnexpectedInputCol >= 0 {
return fmt.Sprintf(
"%s:%d:%d:parse failed, unexpected input at %d:%d",
pe.Input,
pe.UnexpectedInputLine+1,
pe.UnexpectedInputCol+1,
pe.UnexpectedInputLine+1,
pe.UnexpectedInputCol+1,
)
}
return fmt.Sprintf(
"%s:%d:%d:parse failed, parsing: %s, at %d:%d",
pe.Input,
pe.Line+1,
pe.Column+1,
pe.Definition,
pe.Line+1,
pe.Column+1,
)
}
func parseInput(r io.Reader, p parser, b builder, kw []parser) (Node, error) {
c := newContext(bufio.NewReader(r), kw)
p.parse(c)
if c.readErr != nil {
return Node{}, c.readErr
}
if err := c.finalizeParse(p); err != nil {
if perr, ok := err.(*ParseError); ok {
perr.Input = "<input>"
}
return Node{}, err
}
c.offset = 0
c.results.resetPending()
n, _ := b.build(c)
return n[0], nil
}