package treerack import ( "errors" "fmt" "io" ) // CommitType controls how the output of a named parser is handled and represented in the resulting AST. type CommitType int const ( // None indicates the default behavior: parsed segments are represented as named nodes in the AST. // Whitespace handling inherits the syntax's global settings. None CommitType = 0 // Alias treats the parser as a pass-through. Validated segments are included in the AST node of the // enclosing parser rather than creating a distinct node. Alias CommitType = 1 << iota // Whitespace designates a parser (typically a character sequence) to be treated as ignored whitespace // throughout the input. Whitespace // NoWhitespace prevents the automatic skipping of defined whitespace characters within specific sequences. NoWhitespace // Keyword marks a sequence as a reserved keyword. This allows specific sequences to be protected or // restricted in certain contexts via the NoKeyword flag. Keyword // NoKeyword prevents the parser from matching sequences marked as Keywords. NoKeyword // FailPass configures the parser to propagate failure up to the enclosing parser rather than handling it // locally. FailPass // NoFailPass configures the parser to handle the failure locally. This overrides automatic fail-pass, // e.g. in case of Alias definitions. NoFailPass // Root explicitly marks the parser as the root of the syntax. By default, the last defined parser is // considered the root. Root userDefined ) type formatFlags int const ( formatNone formatFlags = 0 formatPretty formatFlags = 1 << iota formatIncludeComments ) type formatOptions struct { mode formatFlags targetWidth int } // TraceEvent indicates the type of a trace entry. type TraceEvent int const ( // Enter is the type of a trace entry set when entering the parser of a definition. Enter TraceEvent = iota // Success is the type of a trace entry set when the parser of a definition is returning with success. Success // Fail is the type of a trace entry set when the parser of a definition is returning with failure. Fail ) // TraceEntry represents trace events set during parsing with the purpose of helping to identify problems with // the input being parsed or to debug a parser. type TraceEntry struct { // Level indicates the level of descend while parsing. The root parser has a level of 0. Level int // Parser is the name of the parser definition that set the trace event. Parser string // From is the offset in the input where the parser, that set the trace event, has started. From int // To is the offset in the input where the parser, that set the trace event, has left. To int // FromLine is the line number where the parser, that set the trace event, has started. FromLine int // FromCol is the column number where the parser, that set the trace event, has started. FromCol int // ToLine is the line number where the parser, that set the trace event, has left. ToLine int // ToCol is the column number where the parser, that set the trace event, has left. ToCol int // Event is the type of the trace event. Event TraceEvent // Reason is the reason why the parser, that set the trace event, has failed. Reason string } // ParseError reports a failure to match the input text against the defined syntax. type ParseError struct { inputContent []rune // Input denotes the name of the input source (e.g., filename), or "" if unavailable. Input string // Offset is the index of the right-most token where the parse failed. Offset int // Line is the zero-based line number of the failure position. // // For display purposes, increment by one. Line int // Column is the zero-based column index of the failure position. Column int // Definition identifies the name of the specific parser definition where the match failed. Definition string // Trace contains the last parsing steps leading to the parse error. Trace []TraceEntry // UnexpectedInputLine has a non-negative value, if the parse failed due to unexpected input. UnexpectedInputLine int // UnexpectedInputLine has a non-negative value, if the parse failed due to unexpected input. UnexpectedInputCol int } type parser interface { nodeName() string nodeID() int commitType() CommitType parse(*context) } type builder interface { nodeName() string nodeID() int build(*context) ([]Node, bool) } // ErrInvalidUnicodeCharacter indicates that the input content contains invalid UTF-8 sequences. var ErrInvalidUnicodeCharacter = errors.New("invalid unicode character") // InputContent returns the specified slice from the consumed parse input. If to < 0, to is length of the // available content. func (pe *ParseError) InputContent(from, to int) []rune { if to < 0 { to = len(pe.inputContent) } if to <= from { return nil } c := make([]rune, to-from) copy(c, pe.inputContent[from:]) return c } // Error returns the formatted failure message. func (pe *ParseError) Error() string { if pe.UnexpectedInputLine >= 0 && pe.UnexpectedInputCol >= 0 { return fmt.Sprintf( "%s:%d:%d:parse failed, unexpected input at %d:%d", pe.Input, pe.UnexpectedInputLine+1, pe.UnexpectedInputCol+1, pe.UnexpectedInputLine+1, pe.UnexpectedInputCol+1, ) } return fmt.Sprintf( "%s:%d:%d:parse failed, parsing: %s, at %d:%d", pe.Input, pe.Line+1, pe.Column+1, pe.Definition, pe.Line+1, pe.Column+1, ) } func parseInput(r io.Reader, p parser, b builder, kw []parser, maxTraceLength int) (Node, error) { bb, err := io.ReadAll(r) if err != nil { return Node{}, err } c := newContext([]rune(string(bb)), kw, maxTraceLength) p.parse(c) if c.readErr != nil { return Node{}, c.readErr } if err := c.finalizeParse(p); err != nil { if perr, ok := err.(*ParseError); ok { perr.Input = "" } return Node{}, err } c.offset = 0 c.results.resetPending() n, _ := b.build(c) return n[0], nil }