1
0
treerack/syntaxhead.go

125 lines
3.0 KiB
Go
Raw Normal View History

2018-01-05 19:06:10 +01:00
package treerack
import (
"bufio"
"errors"
"fmt"
"io"
)
2026-01-18 22:52:27 +01:00
// CommitType controls how the output of a named parser is handled and represented in the resulting AST.
2018-01-05 19:06:10 +01:00
type CommitType int
const (
2026-01-18 22:52:27 +01:00
// None indicates the default behavior: parsed segments are represented as named nodes in the AST.
// Whitespace handling inherits the syntax's global settings.
None CommitType = 0
// Alias treats the parser as a pass-through. Validated segments are included in the AST node of the
// enclosing parser rather than creating a distinct node.
2018-01-05 19:06:10 +01:00
Alias CommitType = 1 << iota
2026-01-18 22:52:27 +01:00
// Whitespace designates a parser (typically a character sequence) to be treated as ignored whitespace
// throughout the input.
2018-01-05 19:06:10 +01:00
Whitespace
2026-01-18 22:52:27 +01:00
// NoWhitespace prevents the automatic skipping of defined whitespace characters within specific sequences.
2018-01-05 19:06:10 +01:00
NoWhitespace
2026-01-18 22:52:27 +01:00
// Keyword marks a sequence as a reserved keyword. This allows specific sequences to be protected or
// restricted in certain contexts via the NoKeyword flag.
2019-02-02 18:07:10 +01:00
Keyword
2026-01-18 22:52:27 +01:00
// NoKeyword prevents the parser from matching sequences marked as Keywords.
2019-02-02 18:07:10 +01:00
NoKeyword
2026-01-18 22:52:27 +01:00
// FailPass configures the parser to propagate failure up to the enclosing parser rather than handling it
// locally.
2018-01-05 19:06:10 +01:00
FailPass
2026-01-18 22:52:27 +01:00
// Root explicitly marks the parser as the root of the syntax. By default, the last defined parser is
// considered the root.
2018-01-05 19:06:10 +01:00
Root
userDefined
)
type formatFlags int
const (
formatNone formatFlags = 0
formatPretty formatFlags = 1 << iota
formatIncludeComments
)
2026-01-18 22:52:27 +01:00
// ParseError reports a failure to match the input text against the defined syntax.
2018-01-05 19:06:10 +01:00
type ParseError struct {
2026-01-18 22:52:27 +01:00
// Input denotes the name of the input source (e.g., filename), or "<input>" if unavailable.
2018-01-05 19:06:10 +01:00
Input string
2026-01-18 22:52:27 +01:00
// Offset is the index of the right-most token where the parse failed.
2018-01-05 19:06:10 +01:00
Offset int
2026-01-18 22:52:27 +01:00
// Line is the zero-based line number of the failure position.
2018-01-05 19:06:10 +01:00
//
2026-01-18 22:52:27 +01:00
// For display purposes, increment by one.
2018-01-05 19:06:10 +01:00
Line int
2026-01-18 22:52:27 +01:00
// Column is the zero-based column index of the failure position.
2018-01-05 19:06:10 +01:00
Column int
2026-01-18 22:52:27 +01:00
// Definition identifies the name of the specific parser definition where the match failed.
2018-01-05 19:06:10 +01:00
Definition string
}
type parser interface {
nodeName() string
nodeID() int
commitType() CommitType
parse(*context)
}
type builder interface {
nodeName() string
nodeID() int
build(*context) ([]*Node, bool)
}
2026-01-18 22:52:27 +01:00
// ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences.
2018-01-05 19:06:10 +01:00
var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character")
2026-01-18 22:52:27 +01:00
// Error returns the formatted failure message.
2018-01-05 19:06:10 +01:00
func (pe *ParseError) Error() string {
return fmt.Sprintf(
"%s:%d:%d:parse failed, parsing: %s",
pe.Input,
pe.Line+1,
pe.Column+1,
pe.Definition,
)
}
2019-02-02 18:07:10 +01:00
func parseInput(r io.Reader, p parser, b builder, kw []parser) (*Node, error) {
c := newContext(bufio.NewReader(r), kw)
2018-01-05 19:06:10 +01:00
p.parse(c)
if c.readErr != nil {
return nil, c.readErr
}
if err := c.finalizeParse(p); err != nil {
if perr, ok := err.(*ParseError); ok {
perr.Input = "<input>"
}
return nil, err
}
c.offset = 0
c.results.resetPending()
n, _ := b.build(c)
return n[0], nil
}