Adding upstream version 1.34.4.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-05-24 07:26:29 +02:00 · 2025-05-24 07:26:29 +02:00 · 4978089aab
commit 4978089aab
parent e393c3af3f
4963 changed files with 677545 additions and 0 deletions
--- a/plugins/common/encoding/decoder.go
+++ b/plugins/common/encoding/decoder.go
@ -0,0 +1,34 @@
+package encoding
+
+import (
+	"errors"
+
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/encoding/unicode"
+)
+
+// NewDecoder returns an x/text Decoder for the specified text encoding.  The
+// Decoder converts a character encoding into utf-8 bytes.  If a BOM is found
+// it will be converted into an utf-8 BOM, you can use
+// github.com/dimchansky/utfbom to strip the BOM.
+//
+// The "none" or "" encoding will pass through bytes unchecked.  Use the utf-8
+// encoding if you want invalid bytes replaced using the unicode
+// replacement character.
+//
+// Detection of utf-16 endianness using the BOM is not currently provided due
+// to the tail input plugins requirement to be able to start at the middle or
+// end of the file.
+func NewDecoder(enc string) (*Decoder, error) {
+	switch enc {
+	case "utf-8":
+		return createDecoder(unicode.UTF8.NewDecoder()), nil
+	case "utf-16le":
+		return createDecoder(unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder()), nil
+	case "utf-16be":
+		return createDecoder(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder()), nil
+	case "none", "":
+		return createDecoder(encoding.Nop.NewDecoder()), nil
+	}
+	return nil, errors.New("unknown character encoding")
+}
--- a/plugins/common/encoding/decoder_reader.go
+++ b/plugins/common/encoding/decoder_reader.go
@ -0,0 +1,164 @@
+package encoding
+
+import (
+	"errors"
+	"io"
+
+	"golang.org/x/text/transform"
+)
+
+// Other than resetting r.err and r.transformComplete in Read() this
+// was copied from x/text
+
+func createDecoder(t transform.Transformer) *Decoder {
+	return &Decoder{Transformer: t}
+}
+
+// A Decoder converts bytes to UTF-8. It implements transform.Transformer.
+//
+// Transforming source bytes that are not of that encoding will not result in an
+// error per se. Each byte that cannot be transcoded will be represented in the
+// output by the UTF-8 encoding of '\uFFFD', the replacement rune.
+type Decoder struct {
+	transform.Transformer
+
+	// This forces external creators of Decoders to use names in struct
+	// initializers, allowing for future extensibility without having to break
+	// code.
+	_ struct{}
+}
+
+// Bytes converts the given encoded bytes to UTF-8. It returns the converted
+// bytes or nil, err if any error occurred.
+func (d *Decoder) Bytes(b []byte) ([]byte, error) {
+	b, _, err := transform.Bytes(d, b)
+	if err != nil {
+		return nil, err
+	}
+	return b, nil
+}
+
+// String converts the given encoded string to UTF-8. It returns the converted
+// string or "", err if any error occurred.
+func (d *Decoder) String(s string) (string, error) {
+	s, _, err := transform.String(d, s)
+	if err != nil {
+		return "", err
+	}
+	return s, nil
+}
+
+// Reader wraps another Reader to decode its bytes.
+//
+// The Decoder may not be used for any other operation as long as the returned
+// Reader is in use.
+func (d *Decoder) Reader(r io.Reader) io.Reader {
+	return NewReader(r, d)
+}
+
+// Reader wraps another io.Reader by transforming the bytes read.
+type Reader struct {
+	r   io.Reader
+	t   transform.Transformer
+	err error
+
+	// dst[dst0:dst1] contains bytes that have been transformed by t but
+	// not yet copied out via Read.
+	dst        []byte
+	dst0, dst1 int
+
+	// src[src0:src1] contains bytes that have been read from r but not
+	// yet transformed through t.
+	src        []byte
+	src0, src1 int
+
+	// transformComplete is whether the transformation is complete,
+	// regardless of whether it was successful.
+	transformComplete bool
+}
+
+var (
+	// errInconsistentByteCount means that Transform returned success (nil
+	// error) but also returned nSrc inconsistent with the src argument.
+	errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
+)
+
+const defaultBufSize = 4096
+
+// NewReader returns a new Reader that wraps r by transforming the bytes read
+// via t. It calls Reset on t.
+func NewReader(r io.Reader, t transform.Transformer) *Reader {
+	t.Reset()
+	return &Reader{
+		r:   r,
+		t:   t,
+		dst: make([]byte, defaultBufSize),
+		src: make([]byte, defaultBufSize),
+	}
+}
+
+// Read implements the io.Reader interface.
+func (r *Reader) Read(p []byte) (int, error) {
+	// Clear previous errors so a Read can be performed even if the last call
+	// returned EOF.
+	r.err = nil
+	r.transformComplete = false
+
+	n := 0
+	for {
+		// Copy out any transformed bytes and return the final error if we are done.
+		if r.dst0 != r.dst1 {
+			n = copy(p, r.dst[r.dst0:r.dst1])
+			r.dst0 += n
+			if r.dst0 == r.dst1 && r.transformComplete {
+				return n, r.err
+			}
+			return n, nil
+		} else if r.transformComplete {
+			return 0, r.err
+		}
+
+		// Try to transform some source bytes, or to flush the transformer if we
+		// are out of source bytes. We do this even if r.r.Read returned an error.
+		// As the io.Reader documentation says, "process the n > 0 bytes returned
+		// before considering the error".
+		if r.src0 != r.src1 || r.err != nil {
+			var err error
+			r.dst0 = 0
+			r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], errors.Is(r.err, io.EOF))
+			r.src0 += n
+
+			switch {
+			case err == nil:
+				if r.src0 != r.src1 {
+					r.err = errInconsistentByteCount
+				}
+				// The Transform call was successful; we are complete if we
+				// cannot read more bytes into src.
+				r.transformComplete = r.err != nil
+				continue
+			case errors.Is(err, transform.ErrShortDst) && (r.dst1 != 0 || n != 0):
+				// Make room in dst by copying out, and try again.
+				continue
+			case errors.Is(err, transform.ErrShortSrc) && r.src1-r.src0 != len(r.src) && r.err == nil:
+				// Read more bytes into src via the code below, and try again.
+			default:
+				r.transformComplete = true
+				// The reader error (r.err) takes precedence over the
+				// transformer error (err) unless r.err is nil or io.EOF.
+				if r.err == nil || errors.Is(r.err, io.EOF) {
+					r.err = err
+				}
+				continue
+			}
+		}
+
+		// Move any untransformed source bytes to the start of the buffer
+		// and read more bytes.
+		if r.src0 != 0 {
+			r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1])
+		}
+		n, r.err = r.r.Read(r.src[r.src1:])
+		r.src1 += n
+	}
+}
--- a/plugins/common/encoding/decoder_test.go
+++ b/plugins/common/encoding/decoder_test.go
@ -0,0 +1,78 @@
+package encoding
+
+import (
+	"bytes"
+	"io"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestDecoder(t *testing.T) {
+	tests := []struct {
+		name        string
+		encoding    string
+		input       []byte
+		expected    []byte
+		expectedErr bool
+	}{
+		{
+			name:     "no decoder utf-8",
+			encoding: "",
+			input:    []byte("howdy"),
+			expected: []byte("howdy"),
+		},
+		{
+			name:     "utf-8 decoder",
+			encoding: "utf-8",
+			input:    []byte("howdy"),
+			expected: []byte("howdy"),
+		},
+		{
+			name:     "utf-8 decoder invalid bytes replaced with replacement char",
+			encoding: "utf-8",
+			input:    []byte("\xff\xfe"),
+			expected: []byte("\uFFFD\uFFFD"),
+		},
+		{
+			name:     "utf-16le decoder no BOM",
+			encoding: "utf-16le",
+			input:    []byte("h\x00o\x00w\x00d\x00y\x00"),
+			expected: []byte("howdy"),
+		},
+		{
+			name:     "utf-16le decoder with BOM",
+			encoding: "utf-16le",
+			input:    []byte("\xff\xfeh\x00o\x00w\x00d\x00y\x00"),
+			expected: []byte("\xef\xbb\xbfhowdy"),
+		},
+		{
+			name:     "utf-16be decoder no BOM",
+			encoding: "utf-16be",
+			input:    []byte("\x00h\x00o\x00w\x00d\x00y"),
+			expected: []byte("howdy"),
+		},
+		{
+			name:     "utf-16be decoder with BOM",
+			encoding: "utf-16be",
+			input:    []byte("\xfe\xff\x00h\x00o\x00w\x00d\x00y"),
+			expected: []byte("\xef\xbb\xbfhowdy"),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			decoder, err := NewDecoder(tt.encoding)
+			require.NoError(t, err)
+			buf := bytes.NewBuffer(tt.input)
+			r := decoder.Reader(buf)
+			actual, err := io.ReadAll(r)
+			if tt.expectedErr {
+				require.Error(t, err)
+				return
+			}
+			require.NoError(t, err)
+			require.Equal(t, tt.expected, actual)
+		})
+	}
+}