1
0

ignore broken unicode chars

This commit is contained in:
Arpad Ryszka 2025-11-01 05:06:41 +01:00
parent cb3240747d
commit 71640ab28f
2 changed files with 24 additions and 1 deletions

9
lib.go
View File

@ -1,7 +1,10 @@
// Package textedit provides a non-regexp, streaming editor to apply basic text manipulation. // Package textedit provides a non-regexp, streaming editor to apply basic text manipulation.
package textedit package textedit
import "io" import (
"io"
"unicode"
)
// Editor instances can be used to edit a text stream. It is expected from the implementations to be reusable // Editor instances can be used to edit a text stream. It is expected from the implementations to be reusable
// with fresh state after the Flush was called on the enclosing writer. // with fresh state after the Flush was called on the enclosing writer.
@ -109,6 +112,10 @@ func (w *Writer) write(r []rune) error {
} }
for _, ri := range r { for _, ri := range r {
if ri == unicode.ReplacementChar {
continue
}
rr, s := w.editor.Edit(ri, w.state) rr, s := w.editor.Edit(ri, w.state)
if _, err := w.out.Write([]byte(string(rr))); err != nil { if _, err := w.out.Write([]byte(string(rr))); err != nil {
w.err = err w.err = err

View File

@ -75,6 +75,22 @@ func TestWriteRune(t *testing.T) {
} }
} }
func TestBrokenUnicode(t *testing.T) {
var b bytes.Buffer
w := textedit.New(&b)
if _, err := w.Write([]byte("foo \xc2bar baz")); err != nil {
t.Fatal(err)
}
if err := w.Flush(); err != nil {
t.Fatal(err)
}
if b.String() != "foo bar baz" {
t.Fatal(b.String())
}
}
func TestFailingWriter(t *testing.T) { func TestFailingWriter(t *testing.T) {
t.Run("after write", func(t *testing.T) { t.Run("after write", func(t *testing.T) {
var b bytes.Buffer var b bytes.Buffer