1
0
Fork 0

Adding upstream version 0.0~git20231022.c6c9f9a.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-22 11:58:01 +02:00
parent dc12642887
commit 0e47c428eb
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
9 changed files with 585 additions and 0 deletions

36
.github/workflows/main.yml vendored Normal file
View file

@ -0,0 +1,36 @@
on: [push, pull_request]
name: Test
jobs:
test:
strategy:
matrix:
go-version: [1.18.x, 1.x]
os: [ubuntu-latest]
arch: ["", "386"]
fail-fast: false
runs-on: ${{ matrix.os }}
steps:
- name: Install Go
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v4
- name: Check formatting
run: diff -u <(echo -n) <(gofmt -d .)
if: runner.os != 'Windows'
- name: Run go vet
env:
GOARCH: ${{ matrix.arch }}
run: go vet ./...
- name: Run staticcheck
uses: dominikh/staticcheck-action@v1.3.0
with:
version: "2023.1.6"
install-go: false
cache-key: ${{ matrix.go-version }}
if: ${{ matrix.go-version == '1.x' }}
- name: Run tests
env:
GOARCH: ${{ matrix.arch }}
run: go test -vet=off ./...

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
.idea
*.iml

29
LICENSE Normal file
View file

@ -0,0 +1,29 @@
Copyright (c) 2023 Dmitry Panov. All rights reserved.
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

63
README.md Normal file
View file

@ -0,0 +1,63 @@
# Universal Lenient Base64 Decoder
This is a modified version of the standard base64 decoder from `encoding/base64`. Key differences:
- Accepts both Standard and URL-safe encoding, no need to specify it explicitly.
- Works for both padded and raw inputs.
- The input can be a `string` or a `[]byte`, no allocation or copy is performed for either.
The goal was to create a decoder compatible with the one used in nodejs (and possibly elsewhere too). Unfortunately
the standard package does not expose enough API to do this efficiently, and trying all 4 possible variants
(standard/url, padded/raw) seemed wasteful.
Because there is no need to convert the input from `string` to `[]byte`, decoding strings (specially long ones)
is more efficient than with the standard library. Below is the comparison between `BenchmarkDecodeBase64Std`
and `BenchmarkDecodeBase64` on go1.21.3:
```
goos: linux
goarch: amd64
pkg: base64dec
cpu: Intel(R) Core(TM) i7-2600S CPU @ 2.80GHz
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
DecodeBase64/2-8 28.85n ± 3% 22.95n ± 2% -20.45% (p=0.000 n=10)
DecodeBase64/4-8 48.05n ± 0% 41.41n ± 1% -13.83% (p=0.000 n=10)
DecodeBase64/8-8 38.31n ± 2% 29.55n ± 1% -22.89% (p=0.000 n=10)
DecodeBase64/64-8 406.7n ± 5% 113.0n ± 2% -72.20% (p=0.000 n=10)
DecodeBase64/8192-8 31.670µ ± 10% 9.995µ ± 2% -68.44% (p=0.000 n=10)
geomean 232.8n 126.0n -45.89%
│ old.txt │ new.txt │
│ B/s │ B/s vs base │
DecodeBase64/2-8 132.2Mi ± 3% 166.2Mi ± 2% +25.70% (p=0.000 n=10)
DecodeBase64/4-8 158.8Mi ± 0% 184.2Mi ± 1% +16.04% (p=0.000 n=10)
DecodeBase64/8-8 298.7Mi ± 2% 387.3Mi ± 1% +29.67% (p=0.000 n=10)
DecodeBase64/64-8 206.4Mi ± 5% 742.4Mi ± 2% +259.71% (p=0.000 n=10)
DecodeBase64/8192-8 329.0Mi ± 11% 1042.4Mi ± 2% +216.87% (p=0.000 n=10)
geomean 211.8Mi 391.4Mi +84.81%
│ old.txt │ new.txt │
│ B/op │ B/op vs base │
DecodeBase64/2-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
DecodeBase64/4-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
DecodeBase64/8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
DecodeBase64/64-8 96.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10)
DecodeBase64/8192-8 12.00Ki ± 0% 0.00Ki ± 0% -100.00% (p=0.000 n=10)
geomean ² ? ² ³
¹ all samples are equal
² summaries must be >0 to compute geomean
³ ratios must be >0 to compute geomean
│ old.txt │ new.txt │
│ allocs/op │ allocs/op vs base │
DecodeBase64/2-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
DecodeBase64/4-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
DecodeBase64/8-8 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
DecodeBase64/64-8 1.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10)
DecodeBase64/8192-8 1.000 ± 0% 0.000 ± 0% -100.00% (p=0.000 n=10)
geomean ² ? ² ³
¹ all samples are equal
² summaries must be >0 to compute geomean
³ ratios must be >0 to compute geomean
```

22
decode_map.go Normal file
View file

@ -0,0 +1,22 @@
// Code generated by generate_decode_map.go. DO NOT EDIT.
package base64dec
var decodeMap = [256]byte{
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0xff, 0x3e, 0xff, 0x3f,
0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0x3f,
0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
}

223
decoder.go Normal file
View file

@ -0,0 +1,223 @@
//go:generate go run generate_decode_map.go
// Package base64dec contains a universal base64 decoder that works on both the standard and url-safe variants, padded and raw.
// The code is based on the standard encoding/base64 package.
package base64dec
import (
"encoding/base64"
"encoding/binary"
"strconv"
)
const padChar = '='
type ByteSeq interface {
[]byte | string
}
// DecodeBase64 decodes src and writes at most base64.RawStdEncoding.DecodedLen(len(src))
// bytes to dst and returns the number of bytes written. If src contains invalid base64 data, it will return the
// number of bytes successfully written and base64.CorruptInputError.
// New line characters (\r and \n) are ignored.
// The input can be in the standard or the alternate (aka url-safe) encoding. It can be padded or un-padded.
// If there is a correct padding, it is consumed and no error is returned. If there is no padding where it's required,
// no error is returned. If there is an incorrect padding (i.e. too many or too few characters) it is treated
// as garbage at the end (i.e. the error will point to the first padding character).
func DecodeBase64[T ByteSeq](dst []byte, src T) (n int, err error) {
if len(src) == 0 {
return 0, nil
}
si := 0
for strconv.IntSize >= 64 && len(src)-si >= 8 && len(dst)-n >= 8 {
src2 := src[si : si+8]
if dn, ok := assemble64(
decodeMap[src2[0]],
decodeMap[src2[1]],
decodeMap[src2[2]],
decodeMap[src2[3]],
decodeMap[src2[4]],
decodeMap[src2[5]],
decodeMap[src2[6]],
decodeMap[src2[7]],
); ok {
binary.BigEndian.PutUint64(dst[n:], dn)
n += 6
si += 8
} else {
var ninc int
si, ninc, err = decodeQuantum(dst[n:], src, si)
n += ninc
if err != nil {
return n, err
}
}
}
for len(src)-si >= 4 && len(dst)-n >= 4 {
src2 := src[si : si+4]
if dn, ok := assemble32(
decodeMap[src2[0]],
decodeMap[src2[1]],
decodeMap[src2[2]],
decodeMap[src2[3]],
); ok {
binary.BigEndian.PutUint32(dst[n:], dn)
n += 3
si += 4
} else {
var ninc int
si, ninc, err = decodeQuantum(dst[n:], src, si)
n += ninc
if err != nil {
return n, err
}
}
}
for si < len(src) {
var ninc int
si, ninc, err = decodeQuantum(dst[n:], src, si)
n += ninc
if err != nil {
return n, err
}
}
return n, err
}
// assemble32 assembles 4 base64 digits into 3 bytes.
// Each digit comes from the decode map, and will be 0xff
// if it came from an invalid character.
func assemble32(n1, n2, n3, n4 byte) (dn uint32, ok bool) {
// Check that all the digits are valid. If any of them was 0xff, their
// bitwise OR will be 0xff.
if n1|n2|n3|n4 == 0xff {
return 0, false
}
return uint32(n1)<<26 |
uint32(n2)<<20 |
uint32(n3)<<14 |
uint32(n4)<<8,
true
}
// assemble64 assembles 8 base64 digits into 6 bytes.
// Each digit comes from the decode map, and will be 0xff
// if it came from an invalid character.
func assemble64(n1, n2, n3, n4, n5, n6, n7, n8 byte) (dn uint64, ok bool) {
// Check that all the digits are valid. If any of them was 0xff, their
// bitwise OR will be 0xff.
if n1|n2|n3|n4|n5|n6|n7|n8 == 0xff {
return 0, false
}
return uint64(n1)<<58 |
uint64(n2)<<52 |
uint64(n3)<<46 |
uint64(n4)<<40 |
uint64(n5)<<34 |
uint64(n6)<<28 |
uint64(n7)<<22 |
uint64(n8)<<16,
true
}
// decodeQuantum decodes up to 4 base64 bytes. The received parameters are
// the destination buffer dst, the source buffer src and an index in the
// source buffer si.
// It returns the number of bytes read from src, the number of bytes written
// to dst, and an error, if any.
func decodeQuantum[T ByteSeq](dst []byte, src T, si int) (nsi, n int, err error) {
// Decode quantum using the base64 alphabet
var dbuf [4]byte
dlen := 4
for j := 0; j < len(dbuf); j++ {
if len(src) == si {
if j == 0 {
return si, 0, nil
}
dlen = j
break
}
in := src[si]
si++
out := decodeMap[in]
if out != 0xff {
dbuf[j] = out
continue
}
if in == '\n' || in == '\r' {
j--
continue
}
dlen = j
if rune(in) != padChar {
err = base64.CorruptInputError(si - 1)
break
}
// We've reached the end and there's padding
switch j {
case 0, 1:
// incorrect padding
err = base64.CorruptInputError(si - 1)
case 2:
// "==" is expected, the first "=" is already consumed.
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
if si == len(src) {
// not enough padding
err = base64.CorruptInputError(si - 1)
break
} else if rune(src[si]) != padChar {
// incorrect padding
err = base64.CorruptInputError(si - 1)
break
}
si++
}
if err == nil {
// skip over newlines
for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
si++
}
if si < len(src) {
// trailing garbage
err = base64.CorruptInputError(si)
}
}
break
}
if dlen == 0 {
return si, 0, err
}
// Convert 4x 6bit source bytes into 3 bytes
val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
dbuf[2], dbuf[1], dbuf[0] = byte(val>>0), byte(val>>8), byte(val>>16)
switch dlen {
case 4:
dst[2] = dbuf[2]
dbuf[2] = 0
fallthrough
case 3:
dst[1] = dbuf[1]
dbuf[1] = 0
fallthrough
case 2:
dst[0] = dbuf[0]
}
return si, dlen - 1, err
}

149
decoder_test.go Normal file
View file

@ -0,0 +1,149 @@
package base64dec
import (
"bytes"
"encoding/base64"
"fmt"
"testing"
)
func TestDecodeBase64(t *testing.T) {
f := func(t *testing.T, input, expectedResult string, expectedError error) {
decoded := make([]byte, base64.RawStdEncoding.DecodedLen(len(input)))
n, err := DecodeBase64(decoded, input)
if err != expectedError {
t.Fatal(err)
}
encoded := base64.StdEncoding.EncodeToString(decoded[:n])
if encoded != expectedResult {
t.Fatal(encoded)
}
}
t.Run("empty input", func(t *testing.T) {
f(t, "", "", nil)
})
t.Run("newlines only", func(t *testing.T) {
f(t, "\r\n", "", nil)
})
t.Run("correct padding", func(t *testing.T) {
f(t, "Z9CA-w==", "Z9CA+w==", nil)
})
t.Run("correct padding split by newline", func(t *testing.T) {
f(t, "Z9CA-w=\n=", "Z9CA+w==", nil)
})
t.Run("correct padding with concatenation", func(t *testing.T) {
f(t, "Z9CA-w==Z9CA-w==", "Z9CA+w==", base64.CorruptInputError(8))
})
t.Run("trailing newline", func(t *testing.T) {
f(t, "Z9CA+wZ9CA-w\n", "Z9CA+wZ9CA+w", nil)
})
t.Run("trailing newline with padding", func(t *testing.T) {
f(t, "Z9CA+wZ9CA-www==\n", "Z9CA+wZ9CA+www==", nil)
})
t.Run("garbage after newline", func(t *testing.T) {
f(t, "Z9CA+wZ9CA-www==\n?", "Z9CA+wZ9CA+www==", base64.CorruptInputError(17))
})
t.Run("no padding", func(t *testing.T) {
f(t, "Z9CA-w", "Z9CA+w==", nil)
})
t.Run("no padding, garbage at the end", func(t *testing.T) {
f(t, "Z9CA-w???", "Z9CA+w==", base64.CorruptInputError(6))
})
t.Run("not enough padding", func(t *testing.T) {
f(t, "Z9CA-w=", "Z9CA+w==", base64.CorruptInputError(6))
})
t.Run("incorrect padding", func(t *testing.T) {
f(t, "Z9CA====", "Z9CA", base64.CorruptInputError(4))
})
t.Run("incorrect padding with extra base64", func(t *testing.T) {
f(t, "Z9CA-w=Z9CA-w=", "Z9CA+w==", base64.CorruptInputError(6))
})
t.Run("incorrect padding with garbage", func(t *testing.T) {
f(t, "Z9CA-w=???", "Z9CA+w==", base64.CorruptInputError(6))
})
}
func FuzzDecodeBase64(f *testing.F) {
f.Add([]byte{})
f.Add([]byte("\x14\xfb\x9c\x03\xd9\x7e"))
f.Add([]byte("\x14\xfb\x9c\x03\xd9"))
f.Add([]byte("\x14\xfb\x9c\x03"))
f.Fuzz(func(t *testing.T, b []byte) {
encoded := base64.StdEncoding.EncodeToString(b)
decoded := make([]byte, len(b))
n, err := DecodeBase64(decoded, encoded)
if err != nil {
t.Fatalf("%v: %v", b, err)
}
if !bytes.Equal(decoded[:n], b) {
t.Fatal(b)
}
})
}
func FuzzDecodeBase64String(f *testing.F) {
f.Add("Z9CA-w")
f.Add("=\n=")
f.Add("=")
f.Add("====")
f.Fuzz(func(t *testing.T, s string) {
decoded := make([]byte, base64.RawStdEncoding.DecodedLen(len(s)))
_, _ = DecodeBase64(decoded, s) // should not panic
})
}
func BenchmarkDecodeBase64(b *testing.B) {
sizes := []int{2, 4, 8, 64, 8192}
dst := make([]byte, 8192)
benchFunc := func(b *testing.B, benchSize int) {
data := base64.StdEncoding.EncodeToString(make([]byte, benchSize))
b.SetBytes(int64(len(data)))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
DecodeBase64(dst, data)
}
}
for _, size := range sizes {
b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
benchFunc(b, size)
})
}
}
func BenchmarkDecodeBase64Std(b *testing.B) {
sizes := []int{2, 4, 8, 64, 8192}
dst := make([]byte, 8192)
benchFunc := func(b *testing.B, benchSize int) {
data := base64.StdEncoding.EncodeToString(make([]byte, benchSize))
b.SetBytes(int64(len(data)))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
base64.StdEncoding.Decode(dst, []byte(data))
}
}
for _, size := range sizes {
b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
benchFunc(b, size)
})
}
}

58
generate_decode_map.go Normal file
View file

@ -0,0 +1,58 @@
//go:build ignore
package main
import (
"bytes"
"fmt"
"log"
"os"
)
func generateDecodeMap() [256]byte {
const alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
var decodeMap [256]byte
for i := 0; i < len(decodeMap); i++ {
decodeMap[i] = 0xff
}
for i := 0; i < len(alphabet); i++ {
decodeMap[alphabet[i]] = byte(i)
}
// for URL encoding
decodeMap['-'] = decodeMap['+']
decodeMap['_'] = decodeMap['/']
return decodeMap
}
func main() {
f, err := os.OpenFile("decode_map.go", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
log.Fatal(err)
}
m := generateDecodeMap()
var b bytes.Buffer
fmt.Fprintf(&b, "// Code generated by generate_decode_map.go. DO NOT EDIT.\n\n")
fmt.Fprintf(&b, "package base64dec\n\n")
fmt.Fprintf(&b, "var decodeMap = [256]byte{")
for i, v := range m {
if i%16 == 0 {
fmt.Fprintf(&b, "\n\t")
} else {
fmt.Fprint(&b, " ")
}
fmt.Fprintf(&b, "0x%02x,", v)
}
fmt.Fprintf(&b, "\n}\n")
_, err = f.Write(b.Bytes())
if err != nil {
log.Fatal(err)
}
err = f.Close()
if err != nil {
log.Fatal(err)
}
}

3
go.mod Normal file
View file

@ -0,0 +1,3 @@
module github.com/dop251/base64dec
go 1.18