1
0
Fork 0

Adding upstream version 2.5.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-19 00:20:02 +02:00
parent c71cb8b61d
commit 982828099e
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
783 changed files with 150650 additions and 0 deletions

View file

@ -0,0 +1,3 @@
# javadata
Go library to read data written with java.io.DataOutput

View file

@ -0,0 +1,34 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build gofuzz
// +build gofuzz
package javadata
import "bytes"
func Fuzz(data []byte) int {
br := bytes.NewReader(data)
jdr := NewReader(br)
var err error
for err == nil {
_, err = jdr.ReadUTF()
}
if err != nil {
return 0
}
return 1
}

View file

@ -0,0 +1,135 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package javadata
import (
"bufio"
"encoding/binary"
"fmt"
"io"
)
// ErrMalformedInput returned when malformed input is encountered
var ErrMalformedInput = fmt.Errorf("malformed input")
// Reader knows how to read java serialized data
type Reader struct {
r *bufio.Reader
}
// NewReader creates a new java data input reader
func NewReader(r io.Reader) *Reader {
return &Reader{r: bufio.NewReader(r)}
}
// ReadBool attempts to reads a bool from the stream
func (r *Reader) ReadBool() (bool, error) {
b, err := r.r.ReadByte()
if err != nil {
return false, err
}
return b != 0, nil
}
// ReadInt32 attempts to reads a signed 32-bit integer from the stream
func (r *Reader) ReadInt32() (rv int32, err error) {
err = binary.Read(r.r, binary.BigEndian, &rv)
return
}
// ReadUint16 attempts to reads a unsigned 16-bit integer from the stream
func (r *Reader) ReadUint16() (rv uint16, err error) {
err = binary.Read(r.r, binary.BigEndian, &rv)
return
}
// ReadCharAsRune attempts to read a java two byte char and return it as a rune
func (r *Reader) ReadCharAsRune() (rv rune, err error) {
var char uint16
err = binary.Read(r.r, binary.BigEndian, &char)
rv = rune(char)
return
}
// ReadUTF attempts to reads a UTF-encoded string from the stream
// this method follows the specific alternate encoding desribed here:
// https://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html
func (r *Reader) ReadUTF() (string, error) {
utfLen, err := r.ReadUint16()
if err != nil {
return "", err
}
bytes := make([]byte, utfLen)
runes := make([]rune, utfLen)
_, err = io.ReadFull(r.r, bytes)
if err != nil {
return "", err
}
var count uint16
var runeCount uint16
// handle simple case of all ascii
for count < utfLen {
c := bytes[count]
if bytes[count] > 127 {
break
}
count++
runes[runeCount] = rune(c)
runeCount++
}
// handle rest
for count < utfLen {
c := bytes[count]
switch bytes[count] >> 4 {
case 0, 1, 2, 3, 4, 5, 6, 7, 8:
/* 0xxxxxxx*/
count++
runes[runeCount] = rune(c)
runeCount++
case 12, 13:
/* 110x xxxx 10xx xxxx*/
count += 2
if count > utfLen {
return "", ErrMalformedInput
}
char2 := rune(bytes[count-1])
if (char2 & 0xC0) != 0x80 {
return "", ErrMalformedInput
}
runes[runeCount] = (rune(c)&0x1F)<<6 | char2&0x3F
runeCount++
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3
if count > utfLen {
return "", ErrMalformedInput
}
char2 := rune(bytes[count-2])
char3 := rune(bytes[count-1])
if ((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80) {
return "", ErrMalformedInput
}
runes[runeCount] = ((rune(c)&0x0F)<<12 | (char2&0x3F)<<6 | (char3&0x3F)<<0)
runeCount++
default:
/* 10xx xxxx, 1111 xxxx */
return "", ErrMalformedInput
}
}
return string(runes[0:runeCount]), nil
}

View file

@ -0,0 +1,249 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package javadata
import (
"bytes"
"io"
"testing"
)
func TestReadBool(t *testing.T) {
tests := []struct {
in []byte
out bool
err error
}{
{
in: []byte{0},
out: false,
},
{
in: []byte{1},
out: true,
},
{
in: []byte{27},
out: true,
},
{
in: []byte{},
err: io.EOF,
},
}
for _, test := range tests {
t.Run(string(test.in), func(t *testing.T) {
sr := bytes.NewReader(test.in)
dr := NewReader(sr)
actual, err := dr.ReadBool()
if err != test.err {
t.Error(err)
}
if actual != test.out {
t.Errorf("expected %t, got %t", test.out, actual)
}
})
}
}
func TestReadUint16(t *testing.T) {
tests := []struct {
in []byte
out uint16
err error
}{
{
in: []byte{0, 0},
out: 0,
},
{
in: []byte{0, 1},
out: 1,
},
{
in: []byte{1, 0},
out: 256,
},
{
in: []byte{},
err: io.EOF,
},
}
for _, test := range tests {
t.Run(string(test.in), func(t *testing.T) {
sr := bytes.NewReader(test.in)
dr := NewReader(sr)
actual, err := dr.ReadUint16()
if err != test.err {
t.Error(err)
}
if actual != test.out {
t.Errorf("expected %d, got %d", test.out, actual)
}
})
}
}
func TestReadInt32(t *testing.T) {
tests := []struct {
in []byte
out int32
err error
}{
{
in: []byte{0, 0, 0, 0},
out: 0,
},
{
in: []byte{0, 0, 0, 1},
out: 1,
},
{
in: []byte{0, 0, 1, 0},
out: 256,
},
{
in: []byte{0, 1, 0, 0},
out: 65536,
},
{
in: []byte{},
err: io.EOF,
},
}
for _, test := range tests {
t.Run(string(test.in), func(t *testing.T) {
sr := bytes.NewReader(test.in)
dr := NewReader(sr)
actual, err := dr.ReadInt32()
if err != test.err {
t.Error(err)
}
if actual != test.out {
t.Errorf("expected %d, got %d", test.out, actual)
}
})
}
}
func TestReadUTF(t *testing.T) {
tests := []struct {
in []byte
out string
err error
}{
{
in: []byte{0, 3, 'c', 'a', 't'},
out: "cat",
},
{
in: []byte{0, 2, 0xc2, 0xa3},
out: "£",
},
{
in: []byte{0, 3, 0xe3, 0x85, 0x85},
out: "ㅅ",
},
{
in: []byte{0, 6, 0xe3, 0x85, 0x85, 'c', 'a', 't'},
out: "ㅅcat",
},
{
in: []byte{},
err: io.EOF,
},
{
in: []byte{0, 3},
err: io.EOF,
},
{
in: []byte{0, 1, 0xc2},
err: ErrMalformedInput,
},
{
in: []byte{0, 2, 0xc2, 0xc3},
err: ErrMalformedInput,
},
{
in: []byte{0, 2, 0xe3, 0x85},
err: ErrMalformedInput,
},
{
in: []byte{0, 3, 0xe3, 0xc5, 0x85},
err: ErrMalformedInput,
},
{
in: []byte{0, 1, 0xff},
err: ErrMalformedInput,
},
{
in: []byte{0x0, 0x05, 0x44, 0x61, 0x52, 0xc4, 0x87},
out: "DaRć",
},
}
for _, test := range tests {
t.Run(string(test.in), func(t *testing.T) {
sr := bytes.NewReader(test.in)
dr := NewReader(sr)
actual, err := dr.ReadUTF()
if err != test.err {
t.Error(err)
}
if actual != test.out {
t.Errorf("expected %s, got %s", test.out, actual)
}
})
}
}
// func TestFile(t *testing.T) {
// f, err := os.Open("stemmer_20000.tbl")
// if err != nil {
// t.Fatal(err)
// }
// r := NewReader(f)
// reversed, err := r.ReadBool()
// if err != nil {
// t.Fatal(err)
// }
// log.Printf("reversed: %t", reversed)
// root, err := r.ReadInt32()
// if err != nil {
// t.Fatal(err)
// }
// log.Printf("root: %d", root)
// n, err := r.ReadInt32()
// if err != nil {
// t.Fatal(err)
// }
// log.Printf("n is %d", n)
// // for n > 0 {
// // utf, err := r.ReadUTF()
// // if err != nil {
// // t.Error(err)
// // }
// // log.Printf("read: %s", utf)
// // n--
// // }
// }