Adding upstream version 1.34.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
e393c3af3f
commit
4978089aab
4963 changed files with 677545 additions and 0 deletions
34
plugins/common/encoding/decoder.go
Normal file
34
plugins/common/encoding/decoder.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
package encoding
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
)
|
||||
|
||||
// NewDecoder returns an x/text Decoder for the specified text encoding. The
|
||||
// Decoder converts a character encoding into utf-8 bytes. If a BOM is found
|
||||
// it will be converted into an utf-8 BOM, you can use
|
||||
// github.com/dimchansky/utfbom to strip the BOM.
|
||||
//
|
||||
// The "none" or "" encoding will pass through bytes unchecked. Use the utf-8
|
||||
// encoding if you want invalid bytes replaced using the unicode
|
||||
// replacement character.
|
||||
//
|
||||
// Detection of utf-16 endianness using the BOM is not currently provided due
|
||||
// to the tail input plugins requirement to be able to start at the middle or
|
||||
// end of the file.
|
||||
func NewDecoder(enc string) (*Decoder, error) {
|
||||
switch enc {
|
||||
case "utf-8":
|
||||
return createDecoder(unicode.UTF8.NewDecoder()), nil
|
||||
case "utf-16le":
|
||||
return createDecoder(unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder()), nil
|
||||
case "utf-16be":
|
||||
return createDecoder(unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder()), nil
|
||||
case "none", "":
|
||||
return createDecoder(encoding.Nop.NewDecoder()), nil
|
||||
}
|
||||
return nil, errors.New("unknown character encoding")
|
||||
}
|
164
plugins/common/encoding/decoder_reader.go
Normal file
164
plugins/common/encoding/decoder_reader.go
Normal file
|
@ -0,0 +1,164 @@
|
|||
package encoding
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// Other than resetting r.err and r.transformComplete in Read() this
|
||||
// was copied from x/text
|
||||
|
||||
func createDecoder(t transform.Transformer) *Decoder {
|
||||
return &Decoder{Transformer: t}
|
||||
}
|
||||
|
||||
// A Decoder converts bytes to UTF-8. It implements transform.Transformer.
|
||||
//
|
||||
// Transforming source bytes that are not of that encoding will not result in an
|
||||
// error per se. Each byte that cannot be transcoded will be represented in the
|
||||
// output by the UTF-8 encoding of '\uFFFD', the replacement rune.
|
||||
type Decoder struct {
|
||||
transform.Transformer
|
||||
|
||||
// This forces external creators of Decoders to use names in struct
|
||||
// initializers, allowing for future extensibility without having to break
|
||||
// code.
|
||||
_ struct{}
|
||||
}
|
||||
|
||||
// Bytes converts the given encoded bytes to UTF-8. It returns the converted
|
||||
// bytes or nil, err if any error occurred.
|
||||
func (d *Decoder) Bytes(b []byte) ([]byte, error) {
|
||||
b, _, err := transform.Bytes(d, b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// String converts the given encoded string to UTF-8. It returns the converted
|
||||
// string or "", err if any error occurred.
|
||||
func (d *Decoder) String(s string) (string, error) {
|
||||
s, _, err := transform.String(d, s)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Reader wraps another Reader to decode its bytes.
|
||||
//
|
||||
// The Decoder may not be used for any other operation as long as the returned
|
||||
// Reader is in use.
|
||||
func (d *Decoder) Reader(r io.Reader) io.Reader {
|
||||
return NewReader(r, d)
|
||||
}
|
||||
|
||||
// Reader wraps another io.Reader by transforming the bytes read.
|
||||
type Reader struct {
|
||||
r io.Reader
|
||||
t transform.Transformer
|
||||
err error
|
||||
|
||||
// dst[dst0:dst1] contains bytes that have been transformed by t but
|
||||
// not yet copied out via Read.
|
||||
dst []byte
|
||||
dst0, dst1 int
|
||||
|
||||
// src[src0:src1] contains bytes that have been read from r but not
|
||||
// yet transformed through t.
|
||||
src []byte
|
||||
src0, src1 int
|
||||
|
||||
// transformComplete is whether the transformation is complete,
|
||||
// regardless of whether it was successful.
|
||||
transformComplete bool
|
||||
}
|
||||
|
||||
var (
|
||||
// errInconsistentByteCount means that Transform returned success (nil
|
||||
// error) but also returned nSrc inconsistent with the src argument.
|
||||
errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
|
||||
)
|
||||
|
||||
const defaultBufSize = 4096
|
||||
|
||||
// NewReader returns a new Reader that wraps r by transforming the bytes read
|
||||
// via t. It calls Reset on t.
|
||||
func NewReader(r io.Reader, t transform.Transformer) *Reader {
|
||||
t.Reset()
|
||||
return &Reader{
|
||||
r: r,
|
||||
t: t,
|
||||
dst: make([]byte, defaultBufSize),
|
||||
src: make([]byte, defaultBufSize),
|
||||
}
|
||||
}
|
||||
|
||||
// Read implements the io.Reader interface.
|
||||
func (r *Reader) Read(p []byte) (int, error) {
|
||||
// Clear previous errors so a Read can be performed even if the last call
|
||||
// returned EOF.
|
||||
r.err = nil
|
||||
r.transformComplete = false
|
||||
|
||||
n := 0
|
||||
for {
|
||||
// Copy out any transformed bytes and return the final error if we are done.
|
||||
if r.dst0 != r.dst1 {
|
||||
n = copy(p, r.dst[r.dst0:r.dst1])
|
||||
r.dst0 += n
|
||||
if r.dst0 == r.dst1 && r.transformComplete {
|
||||
return n, r.err
|
||||
}
|
||||
return n, nil
|
||||
} else if r.transformComplete {
|
||||
return 0, r.err
|
||||
}
|
||||
|
||||
// Try to transform some source bytes, or to flush the transformer if we
|
||||
// are out of source bytes. We do this even if r.r.Read returned an error.
|
||||
// As the io.Reader documentation says, "process the n > 0 bytes returned
|
||||
// before considering the error".
|
||||
if r.src0 != r.src1 || r.err != nil {
|
||||
var err error
|
||||
r.dst0 = 0
|
||||
r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], errors.Is(r.err, io.EOF))
|
||||
r.src0 += n
|
||||
|
||||
switch {
|
||||
case err == nil:
|
||||
if r.src0 != r.src1 {
|
||||
r.err = errInconsistentByteCount
|
||||
}
|
||||
// The Transform call was successful; we are complete if we
|
||||
// cannot read more bytes into src.
|
||||
r.transformComplete = r.err != nil
|
||||
continue
|
||||
case errors.Is(err, transform.ErrShortDst) && (r.dst1 != 0 || n != 0):
|
||||
// Make room in dst by copying out, and try again.
|
||||
continue
|
||||
case errors.Is(err, transform.ErrShortSrc) && r.src1-r.src0 != len(r.src) && r.err == nil:
|
||||
// Read more bytes into src via the code below, and try again.
|
||||
default:
|
||||
r.transformComplete = true
|
||||
// The reader error (r.err) takes precedence over the
|
||||
// transformer error (err) unless r.err is nil or io.EOF.
|
||||
if r.err == nil || errors.Is(r.err, io.EOF) {
|
||||
r.err = err
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Move any untransformed source bytes to the start of the buffer
|
||||
// and read more bytes.
|
||||
if r.src0 != 0 {
|
||||
r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1])
|
||||
}
|
||||
n, r.err = r.r.Read(r.src[r.src1:])
|
||||
r.src1 += n
|
||||
}
|
||||
}
|
78
plugins/common/encoding/decoder_test.go
Normal file
78
plugins/common/encoding/decoder_test.go
Normal file
|
@ -0,0 +1,78 @@
|
|||
package encoding
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDecoder(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
encoding string
|
||||
input []byte
|
||||
expected []byte
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
name: "no decoder utf-8",
|
||||
encoding: "",
|
||||
input: []byte("howdy"),
|
||||
expected: []byte("howdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-8 decoder",
|
||||
encoding: "utf-8",
|
||||
input: []byte("howdy"),
|
||||
expected: []byte("howdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-8 decoder invalid bytes replaced with replacement char",
|
||||
encoding: "utf-8",
|
||||
input: []byte("\xff\xfe"),
|
||||
expected: []byte("\uFFFD\uFFFD"),
|
||||
},
|
||||
{
|
||||
name: "utf-16le decoder no BOM",
|
||||
encoding: "utf-16le",
|
||||
input: []byte("h\x00o\x00w\x00d\x00y\x00"),
|
||||
expected: []byte("howdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-16le decoder with BOM",
|
||||
encoding: "utf-16le",
|
||||
input: []byte("\xff\xfeh\x00o\x00w\x00d\x00y\x00"),
|
||||
expected: []byte("\xef\xbb\xbfhowdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-16be decoder no BOM",
|
||||
encoding: "utf-16be",
|
||||
input: []byte("\x00h\x00o\x00w\x00d\x00y"),
|
||||
expected: []byte("howdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-16be decoder with BOM",
|
||||
encoding: "utf-16be",
|
||||
input: []byte("\xfe\xff\x00h\x00o\x00w\x00d\x00y"),
|
||||
expected: []byte("\xef\xbb\xbfhowdy"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
decoder, err := NewDecoder(tt.encoding)
|
||||
require.NoError(t, err)
|
||||
buf := bytes.NewBuffer(tt.input)
|
||||
r := decoder.Reader(buf)
|
||||
actual, err := io.ReadAll(r)
|
||||
if tt.expectedErr {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tt.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue