diff --git a/.build.yml b/.build.yml new file mode 100644 index 0000000..934da9f --- /dev/null +++ b/.build.yml @@ -0,0 +1,18 @@ +image: archlinux +packages: + - go + - postgresql +sources: + - https://github.com/go-ap/jsonld +environment: + GO111MODULE: 'on' +tasks: + - setup: | + cd jsonld && go mod download + - tests: | + cd jsonld && make test + - coverage: | + set -a +x + cd jsonld && make coverage + GIT_SHA=$(git rev-parse --verify HEAD) + GIT_BRANCH=$(git name-rev --name-only HEAD) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..42d9ac4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +# Gogland +.idea/ + +# Binaries for programs and plugins +*.so + +# Test binary, build with `go test -c` +*.test + +# Output of the go coverage tools +*.out +*.coverprofile + +*pkg diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2ff2786 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Marius Orcsik + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6682028 --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +TEST := go test +TEST_FLAGS ?= -v +TEST_TARGET ?= ./... +GO111MODULE=on +PROJECT_NAME := $(shell basename $(PWD)) + +.PHONY: test coverage clean + +test: + $(TEST) $(TEST_FLAGS) $(TEST_TARGET) + +coverage: TEST_TARGET := . +coverage: TEST_FLAGS += -covermode=count -coverprofile $(PROJECT_NAME).coverprofile +coverage: test + +clean: + $(RM) -v *.coverprofile + diff --git a/README.md b/README.md new file mode 100644 index 0000000..d423c38 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# JSON-ld for Go + +[![MIT Licensed](https://img.shields.io/github/license/go-ap/jsonld.svg)](https://raw.githubusercontent.com/go-ap/jsonld/master/LICENSE) +[![Build Status](https://builds.sr.ht/~mariusor/jsonld.svg)](https://builds.sr.ht/~mariusor/jsonld) +[![Test Coverage](https://img.shields.io/codecov/c/github/go-ap/jsonld.svg)](https://codecov.io/gh/go-ap/jsonld) +[![Go Report Card](https://goreportcard.com/badge/github.com/go-ap/jsonld)](https://goreportcard.com/report/github.com/go-ap/jsonld) + + +Basic lib for using [activity pub](https://www.w3.org/TR/activitypub/#Overview) API in Go. + +## Usage + +```go +import "github.com/go-ap/jsonld" +``` diff --git a/context.go b/context.go new file mode 100644 index 0000000..ac36e86 --- /dev/null +++ b/context.go @@ -0,0 +1,219 @@ +package jsonld + +import ( + "encoding/json" + "strings" +) + +// From the JSON-LD spec 3.3 +// https://www.w3.org/TR/json-ld/#dfn-keyword +const ( + // @context + // Used to define the short-hand names that are used throughout a JSON-LD document. + // These short-hand names are called terms and help developers to express specific identifiers in a compact manner. + // The @context keyword is described in detail in section 5.1 The Context. + ContextKw Term = "@context" + // @id + //Used to uniquely identify things that are being described in the document with IRIs or blank node identifiers. + // This keyword is described in section 5.3 Node Identifiers. + IdKw Term = "@id" + // @value + // Used to specify the data that is associated with a particular property in the graph. + // This keyword is described in section 6.9 String Internationalization and section 6.4 Typed Values. + ValueKw Term = "@value" + // @language + // Used to specify the language for a particular string value or the default language of a JSON-LD document. + // This keyword is described in section 6.9 String Internationalization. + LanguageKw Term = "@language" + //@type + //Used to set the data type of a node or typed value. This keyword is described in section 6.4 Typed Values. + TypeKw Term = "@type" + // @container + // Used to set the default container type for a term. This keyword is described in section 6.11 Sets and Lists. + ContainerKw Term = "@container" + //@list + //Used to express an ordered set of data. This keyword is described in section 6.11 Sets and Lists. + ListKw Term = "@list" + // @set + // Used to express an unordered set of data and to ensure that values are always represented as arrays. + // This keyword is described in section 6.11 Sets and Lists. + SetKw Term = "@set" + // @reverse + // Used to express reverse properties. This keyword is described in section 6.12 Reverse Properties. + ReverseKw Term = "@reverse" + // @index + // Used to specify that a container is used to index information and that processing should continue deeper + // into a JSON data structure. This keyword is described in section 6.16 Data Indexing. + IndexKw Term = "@index" + // @base + // Used to set the base IRI against which relative IRIs are resolved. T + // his keyword is described in section 6.1 Base IRI. + BaseKw Term = "@base" + // @vocab + // Used to expand properties and values in @type with a common prefix IRI. + // This keyword is described in section 6.2 Default Vocabulary. + VocabKw Term = "@vocab" + // @graph + // Used to express a graph. This keyword is described in section 6.13 Named Graphs. + GraphKw Term = "@graph" +) + +// ContentType is the content type of JsonLD documents +const ContentType = `application/ld+json; profile="https://www.w3.org/ns/activitystreams"` + +type ( + // Ref basic type + LangRef string + // Term represents the JSON-LD term for @context maps + Term string + // IRI is a International Resource Identificator + IRI string + // Terms is an array of Term values + Terms []Term +) + +// Nillable +type Nillable interface { + IsNil() bool +} + +type IRILike interface { + IsCompact() bool + IsAbsolute() bool + IsRelative() bool +} + +func (i IRI) IsCompact() bool { + return !i.IsAbsolute() && strings.Contains(string(i), ":") +} +func (i IRI) IsAbsolute() bool { + return strings.Contains(string(i), "https://") +} +func (i IRI) IsRelative() bool { + return !i.IsAbsolute() +} + +var keywords = Terms{ + BaseKw, + ContextKw, + ContainerKw, + GraphKw, + IdKw, + IndexKw, + LanguageKw, + ListKw, + ReverseKw, + SetKw, + TypeKw, + ValueKw, + VocabKw, +} + +const NilTerm Term = "-" +const NilLangRef LangRef = "-" + +type ContextObject struct { + ID interface{} `jsonld:"@id,omitempty,collapsible"` + Type interface{} `jsonld:"@type,omitempty,collapsible"` +} + +// Context is of of the basic JSON-LD elements. +// It represents an array of ContextElements +type Context []ContextElement + +// ContextElement is used to map terms to IRIs or JSON objects. +// Terms are case sensitive and any valid string that is not a reserved JSON-LD +// keyword can be used as a term. +type ContextElement struct { + Term Term + IRI IRI +} + +func GetContext() Context { + return Context{} +} + +//type Context Collapsible + +// Collapsible is an interface used by the JSON-LD marshaller to collapse a struct to one single value +type Collapsible interface { + Collapse() interface{} +} + +// Collapse returns the plain text collapsed value of the current Context object +func (c Context) Collapse() interface{} { + if len(c) == 1 && len(c[0].IRI) > 0 { + return c[0].IRI + } + for _, el := range c { + if el.Term == NilTerm { + + } + } + + return c +} + +// Collapse returns the plain text collapsed value of the current IRI string +func (i IRI) Collapse() interface{} { + return i +} + +// MarshalText basic stringify function +func (i IRI) MarshalText() ([]byte, error) { + return []byte(i), nil +} + +// MarshalJSON returns the JSON document represented by the current Context +// This should return : +// If only one element in the context and the element has no Term -> json marshaled string +// If multiple elements in the context without Term -> json marshaled array of strings +// If multiple elements where at least one doesn't have a Term and one has a Term -> json marshaled array +// If multiple elements where all have Terms -> json marshaled object +func (c Context) MarshalJSON() ([]byte, error) { + mapIRI := make(map[Term]IRI, 0) + arr := make([]interface{}, 0) + i := 0 + if len(c) == 1 && len(c[0].IRI) > 0 { + return json.Marshal(c[0].IRI) + } + for _, el := range c { + t := el.Term + iri := el.IRI + if t.IsNil() { + arr = append(arr, iri) + i += 1 + } else { + if len(iri) > 0 { + mapIRI[t] = iri + } + } + } + if len(mapIRI) > 0 { + if len(arr) == 0 { + return json.Marshal(mapIRI) + } + arr = append(arr, mapIRI) + } + return json.Marshal(arr) +} + +// UnmarshalJSON tries to load the Context from the incoming json value +func (c *Context) UnmarshalJSON(data []byte) error { + return nil +} + +// IsNil returns if current LangRef is equal to empty string or to its nil value +func (l LangRef) IsNil() bool { + return len(l) == 0 || l == NilLangRef +} + +// IsNil returns if current IRI is equal to empty string +func (i IRI) IsNil() bool { + return len(i) == 0 +} + +// IsNil returns if current Term is equal to empty string or to its nil value +func (i Term) IsNil() bool { + return len(i) == 0 || i == NilTerm +} diff --git a/context_test.go b/context_test.go new file mode 100644 index 0000000..94cd6d3 --- /dev/null +++ b/context_test.go @@ -0,0 +1,101 @@ +package jsonld + +import ( + "bytes" + "encoding/json" + "strings" + "testing" +) + +func TestRef_MarshalText(t *testing.T) { + test := "test" + a := IRI(test) + + out, err := a.MarshalText() + if err != nil { + t.Errorf("Error %s", err) + } + if bytes.Compare(out, []byte(test)) != 0 { + t.Errorf("Invalid result '%s', expected '%s'", out, test) + } +} + +func TestContext_MarshalJSON(t *testing.T) { + { + url := "test" + c := Context{{NilTerm, IRI(url)}} + + out, err := c.MarshalJSON() + if err != nil { + t.Errorf("%s", err) + } + if !strings.Contains(string(out), url) { + t.Errorf("Json doesn't contain %s, %s", url, string(out)) + } + jUrl, _ := json.Marshal(url) + if !bytes.Equal(jUrl, out) { + t.Errorf("Strings should be equal %s, %s", jUrl, out) + } + } + { + url := "example.com" + asTerm := "testingTerm##" + asUrl := "https://activitipubrocks.com" + c2 := Context{{NilTerm, IRI(url)}, {Term(asTerm), IRI(asUrl)}} + out, err := c2.MarshalJSON() + if err != nil { + t.Errorf("%s", err) + } + if !strings.Contains(string(out), url) { + t.Errorf("Json doesn't contain URL %s, %s", url, string(out)) + } + if !strings.Contains(string(out), asUrl) { + t.Errorf("Json doesn't contain URL %s, %s", asUrl, string(out)) + } + if !strings.Contains(string(out), asTerm) { + t.Errorf("Json doesn't contain Term %s, %s", asTerm, string(out)) + } + } + { + url := "test" + testTerm := "test_term" + asTerm := "testingTerm##" + asUrl := "https://activitipubrocks.com" + c3 := Context{{Term(testTerm), IRI(url)}, {Term(asTerm), IRI(asUrl)}} + out, err := c3.MarshalJSON() + if err != nil { + t.Errorf("%s", err) + } + if !strings.Contains(string(out), url) { + t.Errorf("Json doesn't contain URL %s, %s", url, string(out)) + } + if !strings.Contains(string(out), asUrl) { + t.Errorf("Json doesn't contain URL %s, %s", asUrl, string(out)) + } + if !strings.Contains(string(out), asTerm) { + t.Errorf("Json doesn't contain Term %s, %s", asTerm, string(out)) + } + if !strings.Contains(string(out), testTerm) { + t.Errorf("Json doesn't contain Term %s, %s", testTerm, string(out)) + } + } + { + url1 := "test" + url2 := "http://example.com" + c := Context{ + {IRI: IRI(url1)}, + {IRI: IRI(url2)}, + } + + out, err := c.MarshalJSON() + if err != nil { + t.Errorf("%s", err) + } + if !strings.Contains(string(out), url1) { + t.Errorf("Json doesn't contain %s, %s", url1, string(out)) + } + if !strings.Contains(string(out), url2) { + t.Errorf("Json doesn't contain %s, %s", url1, string(out)) + } + } +} diff --git a/decode.go b/decode.go new file mode 100644 index 0000000..e0a2564 --- /dev/null +++ b/decode.go @@ -0,0 +1,1253 @@ +package jsonld + +import ( + "bytes" + "encoding" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "reflect" + "runtime" + "strconv" + "unicode" + "unicode/utf16" + "unicode/utf8" +) + +// Unmarshal parses the JSON-encoded data and stores the result +// in the value pointed to by v. If v is nil or not a pointer, +// Unmarshal returns an InvalidUnmarshalError. +// +// Unmarshal uses the inverse of the encodings that +// Marshal uses, allocating maps, slices, and pointers as necessary, +// with the following additional rules: +// +// To unmarshal JSON into a pointer, Unmarshal first handles the case of +// the JSON being the JSON literal null. In that case, Unmarshal sets +// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into +// the value pointed at by the pointer. If the pointer is nil, Unmarshal +// allocates a new value for it to point to. +// +// To unmarshal JSON into a value implementing the Unmarshaler interface, +// Unmarshal calls that value's UnmarshalJSON method, including +// when the input is a JSON null. +// Otherwise, if the value implements encoding.TextUnmarshaler +// and the input is a JSON quoted string, Unmarshal calls that value's +// UnmarshalText method with the unquoted form of the string. +// +// To unmarshal JSON into a struct, Unmarshal matches incoming object +// keys to the keys used by Marshal (either the struct field name or its tag), +// preferring an exact match but also accepting a case-insensitive match. +// Unmarshal will only set exported fields of the struct. +// +// To unmarshal JSON into an interface value, +// Unmarshal stores one of these in the interface value: +// +// bool, for JSON booleans +// float64, for JSON numbers +// string, for JSON strings +// []interface{}, for JSON arrays +// map[string]interface{}, for JSON objects +// nil for JSON null +// +// To unmarshal a JSON array into a slice, Unmarshal resets the slice length +// to zero and then appends each element to the slice. +// As a special case, to unmarshal an empty JSON array into a slice, +// Unmarshal replaces the slice with a new empty slice. +// +// To unmarshal a JSON array into a Go array, Unmarshal decodes +// JSON array elements into corresponding Go array elements. +// If the Go array is smaller than the JSON array, +// the additional JSON array elements are discarded. +// If the JSON array is smaller than the Go array, +// the additional Go array elements are set to zero values. +// +// To unmarshal a JSON object into a map, Unmarshal first establishes a map to +// use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal +// reuses the existing map, keeping existing entries. Unmarshal then stores +// key-value pairs from the JSON object into the map. The map's key type must +// either be a string, an integer, or implement encoding.TextUnmarshaler. +// +// If a JSON value is not appropriate for a given target type, +// or if a JSON number overflows the target type, Unmarshal +// skips that field and completes the unmarshaling as best it can. +// If no more serious errors are encountered, Unmarshal returns +// an UnmarshalTypeError describing the earliest such error. In any +// case, it's not guaranteed that all the remaining fields following +// the problematic one will be unmarshaled into the target object. +// +// The JSON null value unmarshals into an interface, map, pointer, or slice +// by setting that Go value to nil. Because null is often used in JSON to mean +// ``not present,'' unmarshaling a JSON null into any other Go type has no effect +// on the value and produces no error. +// +// When unmarshaling quoted strings, invalid UTF-8 or +// invalid UTF-16 surrogate pairs are not treated as an error. +// Instead, they are replaced by the Unicode replacement +// character U+FFFD. +// +func Unmarshal(data []byte, v interface{}) error { + var d decodeState + + err := checkValid(data, &d.scan) + if err != nil { + return err + } + + d.init(data) + return d.unmarshal(&v) +} + +// An UnmarshalTypeError describes a JSON value that was +// not appropriate for a value of a specific Go type. +type UnmarshalTypeError struct { + Value string // description of JSON value - "bool", "array", "number -5" + Type reflect.Type // type of Go value it could not be assigned to + Offset int64 // error occurred after reading Offset bytes + Struct string // name of the struct type containing the field + Field string // name of the field holding the Go value +} + +func (e *UnmarshalTypeError) Error() string { + if e.Struct != "" || e.Field != "" { + return tagLabel + ": cannot unmarshal " + e.Value + " into Go struct field " + e.Struct + "." + e.Field + " of type " + e.Type.String() + } + return tagLabel + ": cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String() +} + +// An UnmarshalFieldError describes a JSON object key that +// led to an unexported (and therefore unwritable) struct field. +// (No longer used; kept for compatibility.) +type UnmarshalFieldError struct { + Key string + Type reflect.Type + Field reflect.StructField +} + +func (e *UnmarshalFieldError) Error() string { + return tagLabel + ": cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String() +} + +// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal. +// (The argument to Unmarshal must be a non-nil pointer.) +type InvalidUnmarshalError struct { + Type reflect.Type +} + +func (e *InvalidUnmarshalError) Error() string { + if e.Type == nil { + return tagLabel + ": Unmarshal(nil)" + } + + if e.Type.Kind() != reflect.Ptr { + return tagLabel + ": Unmarshal(non-pointer " + e.Type.String() + ")" + } + return tagLabel + ": Unmarshal(nil " + e.Type.String() + ")" +} + +func (d *decodeState) unmarshal(v interface{}) (err error) { + defer func() { + if r := recover(); r != nil { + if _, ok := r.(runtime.Error); ok { + panic(r) + } + err = r.(error) + } + }() + + rv := reflect.ValueOf(v) + if rv.Kind() != reflect.Ptr || rv.IsNil() { + return &InvalidUnmarshalError{reflect.TypeOf(v)} + } + + d.scan.reset() + // We decode rv not rv.Elem because the Unmarshaler interface + // test must be applied at the top level of the value. + d.value(rv) + return d.savedError +} + +// A Number represents a JSON number literal. +type Number string + +// String returns the literal text of the number. +func (n Number) String() string { return string(n) } + +// Float64 returns the number as a float64. +func (n Number) Float64() (float64, error) { + return strconv.ParseFloat(string(n), 64) +} + +// Int64 returns the number as an int64. +func (n Number) Int64() (int64, error) { + return strconv.ParseInt(string(n), 10, 64) +} + +// isValidNumber reports whether s is a valid JSON number literal. +func isValidNumber(s string) bool { + // This function implements the JSON numbers grammar. + // See https://tools.ietf.org/html/rfc7159#section-6 + // and http://json.org/number.gif + + if s == "" { + return false + } + + // Optional - + if s[0] == '-' { + s = s[1:] + if s == "" { + return false + } + } + + // Digits + switch { + default: + return false + + case s[0] == '0': + s = s[1:] + + case '1' <= s[0] && s[0] <= '9': + s = s[1:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // . followed by 1 or more digits. + if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { + s = s[2:] + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // e or E followed by an optional - or + and + // 1 or more digits. + if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { + s = s[1:] + if s[0] == '+' || s[0] == '-' { + s = s[1:] + if s == "" { + return false + } + } + for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { + s = s[1:] + } + } + + // Make sure we are at the end. + return s == "" +} + +// decodeState represents the state while decoding a JSON value. +type decodeState struct { + data []byte + off int // read offset in data + scan scanner + nextscan scanner // for calls to nextValue + errorContext struct { // provides context for type errors + Struct string + Field string + } + savedError error + useNumber bool +} + +// errPhase is used for errors that should not happen unless +// there is a bug in the JSON decoder or something is editing +// the data slice while the decoder executes. +var errPhase = errors.New(tagLabel + " decoder out of sync - data changing underfoot?") + +func (d *decodeState) init(data []byte) *decodeState { + d.data = data + d.off = 0 + d.savedError = nil + d.errorContext.Struct = "" + d.errorContext.Field = "" + return d +} + +// error aborts the decoding by panicking with err. +func (d *decodeState) error(err error) { + panic(d.addErrorContext(err)) +} + +// saveError saves the first err it is called with, +// for reporting at the end of the unmarshal. +func (d *decodeState) saveError(err error) { + if d.savedError == nil { + d.savedError = d.addErrorContext(err) + } +} + +// addErrorContext returns a new error enhanced with information from d.errorContext +func (d *decodeState) addErrorContext(err error) error { + if d.errorContext.Struct != "" || d.errorContext.Field != "" { + switch err := err.(type) { + case *UnmarshalTypeError: + err.Struct = d.errorContext.Struct + err.Field = d.errorContext.Field + return err + } + } + return err +} + +// next cuts off and returns the next full JSON value in d.data[d.off:]. +// The next value is known to be an object or array, not a literal. +func (d *decodeState) next() []byte { + c := d.data[d.off] + item, rest, err := nextValue(d.data[d.off:], &d.nextscan) + if err != nil { + d.error(err) + } + d.off = len(d.data) - len(rest) + + // Our scanner has seen the opening brace/bracket + // and thinks we're still in the middle of the object. + // invent a closing brace/bracket to get it out. + if c == '{' { + d.scan.step(&d.scan, '}') + } else { + d.scan.step(&d.scan, ']') + } + + return item +} + +// scanWhile processes bytes in d.data[d.off:] until it +// receives a scan code not equal to op. +// It updates d.off and returns the new scan code. +func (d *decodeState) scanWhile(op int) int { + var newOp int + for { + if d.off >= len(d.data) { + newOp = d.scan.eof() + d.off = len(d.data) + 1 // mark processed EOF with len+1 + } else { + c := d.data[d.off] + d.off++ + newOp = d.scan.step(&d.scan, c) + } + if newOp != op { + break + } + } + return newOp +} + +// value decodes a JSON value from d.data[d.off:] into the value. +// it updates d.off to point past the decoded value. +func (d *decodeState) value(v reflect.Value) { + if !v.IsValid() { + _, rest, err := nextValue(d.data[d.off:], &d.nextscan) + if err != nil { + d.error(err) + } + d.off = len(d.data) - len(rest) + + // d.scan thinks we're still at the beginning of the item. + // Feed in an empty string - the shortest, simplest value - + // so that it knows we got to the end of the value. + if d.scan.redo { + // rewind. + d.scan.redo = false + d.scan.step = stateBeginValue + } + d.scan.step(&d.scan, '"') + d.scan.step(&d.scan, '"') + + n := len(d.scan.parseState) + if n > 0 && d.scan.parseState[n-1] == parseObjectKey { + // d.scan thinks we just read an object key; finish the object + d.scan.step(&d.scan, ':') + d.scan.step(&d.scan, '"') + d.scan.step(&d.scan, '"') + d.scan.step(&d.scan, '}') + } + + return + } + + switch op := d.scanWhile(scanSkipSpace); op { + default: + d.error(errPhase) + + case scanBeginArray: + d.array(v) + + case scanBeginObject: + d.object(v) + + case scanBeginLiteral: + d.literal(v) + } +} + +type unquotedValue struct{} + +// valueQuoted is like value but decodes a +// quoted string literal or literal null into an interface value. +// If it finds anything other than a quoted string literal or null, +// valueQuoted returns unquotedValue{}. +func (d *decodeState) valueQuoted() interface{} { + switch op := d.scanWhile(scanSkipSpace); op { + default: + d.error(errPhase) + + case scanBeginArray: + d.array(reflect.Value{}) + + case scanBeginObject: + d.object(reflect.Value{}) + + case scanBeginLiteral: + switch v := d.literalInterface().(type) { + case nil, string: + return v + } + } + return unquotedValue{} +} + +// indirect walks down v allocating pointers as needed, +// until it gets to a non-pointer. +// if it encounters an Unmarshaler, indirect stops and returns that. +// if decodingNull is true, indirect stops at the last pointer so it can be set to nil. +func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (json.Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { + // If v is a named type and is addressable, + // start with its address, so that if the type has pointer methods, + // we find them. + if v.Kind() != reflect.Ptr && v.Type().Name() != "" && v.CanAddr() { + v = v.Addr() + } + for { + // Load value from interface, but only if the result will be + // usefully addressable. + if v.Kind() == reflect.Interface && !v.IsNil() { + e := v.Elem() + if e.Kind() == reflect.Ptr && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Ptr) { + v = e + continue + } + } + + if v.Kind() != reflect.Ptr { + break + } + + if v.Elem().Kind() != reflect.Ptr && decodingNull && v.CanSet() { + break + } + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + if v.Type().NumMethod() > 0 { + if u, ok := v.Interface().(json.Unmarshaler); ok { + return u, nil, reflect.Value{} + } + if !decodingNull { + if u, ok := v.Interface().(encoding.TextUnmarshaler); ok { + return nil, u, reflect.Value{} + } + } + } + v = v.Elem() + } + return nil, nil, v +} + +// array consumes an array from d.data[d.off-1:], decoding into the value v. +// the first byte of the array ('[') has been read already. +func (d *decodeState) array(v reflect.Value) { + // Check for unmarshaler. + u, ut, pv := d.indirect(v, false) + if u != nil { + d.off-- + err := u.UnmarshalJSON(d.next()) + if err != nil { + d.error(err) + } + return + } + if ut != nil { + d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) + d.off-- + d.next() + return + } + + v = pv + + // Check type of target. + switch v.Kind() { + case reflect.Interface: + if v.NumMethod() == 0 { + // Decoding into nil interface? Switch to non-reflect code. + v.Set(reflect.ValueOf(d.arrayInterface())) + return + } + // Otherwise it's invalid. + fallthrough + default: + d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) + d.off-- + d.next() + return + case reflect.Array: + case reflect.Slice: + break + } + + i := 0 + for { + // Look ahead for ] - can only happen on first iteration. + op := d.scanWhile(scanSkipSpace) + if op == scanEndArray { + break + } + + // Back up so d.value can have the byte we just read. + d.off-- + d.scan.undo(op) + + // Get element of array, growing if necessary. + if v.Kind() == reflect.Slice { + // Grow slice if necessary + if i >= v.Cap() { + newcap := v.Cap() + v.Cap()/2 + if newcap < 4 { + newcap = 4 + } + newv := reflect.MakeSlice(v.Type(), v.Len(), newcap) + reflect.Copy(newv, v) + v.Set(newv) + } + if i >= v.Len() { + v.SetLen(i + 1) + } + } + + if i < v.Len() { + // Decode into element. + d.value(v.Index(i)) + } else { + // Ran out of fixed array: skip. + d.value(reflect.Value{}) + } + i++ + + // Next token must be , or ]. + op = d.scanWhile(scanSkipSpace) + if op == scanEndArray { + break + } + if op != scanArrayValue { + d.error(errPhase) + } + } + + if i < v.Len() { + if v.Kind() == reflect.Array { + // Array. Zero the rest. + z := reflect.Zero(v.Type().Elem()) + for ; i < v.Len(); i++ { + v.Index(i).Set(z) + } + } else { + v.SetLen(i) + } + } + if i == 0 && v.Kind() == reflect.Slice { + v.Set(reflect.MakeSlice(v.Type(), 0, 0)) + } +} + +var nullLiteral = []byte("null") +var textUnmarshalerType = reflect.TypeOf(new(encoding.TextUnmarshaler)).Elem() + +// object consumes an object from d.data[d.off-1:], decoding into the value v. +// the first byte ('{') of the object has been read already. +func (d *decodeState) object(v reflect.Value) { + // Check for unmarshaler. + u, ut, pv := d.indirect(v, false) + if u != nil { + d.off-- + err := u.UnmarshalJSON(d.next()) + if err != nil { + d.error(err) + } + return + } + if ut != nil { + d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) + d.off-- + d.next() // skip over { } in input + return + } + v = pv + + // Decoding into nil interface? Switch to non-reflect code. + if v.Kind() == reflect.Interface && v.NumMethod() == 0 { + v.Set(reflect.ValueOf(d.objectInterface())) + return + } + + // Check type of target: + // struct or + // map[T1]T2 where T1 is string, an integer type, + // or an encoding.TextUnmarshaler + switch v.Kind() { + case reflect.Map: + // Map key must either have string kind, have an integer kind, + // or be an encoding.TextUnmarshaler. + t := v.Type() + switch t.Key().Kind() { + case reflect.String, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + default: + if !reflect.PtrTo(t.Key()).Implements(textUnmarshalerType) { + d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) + d.off-- + d.next() // skip over { } in input + return + } + } + if v.IsNil() { + v.Set(reflect.MakeMap(t)) + } + case reflect.Struct: + // ok + default: + d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) + d.off-- + d.next() // skip over { } in input + return + } + + var mapElem reflect.Value + + for { + // Read opening " of string key or closing }. + op := d.scanWhile(scanSkipSpace) + if op == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if op != scanBeginLiteral { + d.error(errPhase) + } + + // Read key. + start := d.off - 1 + op = d.scanWhile(scanContinue) + item := d.data[start : d.off-1] + key, ok := unquoteBytes(item) + if !ok { + d.error(errPhase) + } + + // Figure out field corresponding to key. + var subv reflect.Value + destring := false // whether the value is wrapped in a string to be decoded first + + if v.Kind() == reflect.Map { + elemType := v.Type().Elem() + if !mapElem.IsValid() { + mapElem = reflect.New(elemType).Elem() + } else { + mapElem.Set(reflect.Zero(elemType)) + } + subv = mapElem + } else { + var f *field + fields := cachedTypeFields(v.Type()) + for i := range fields { + ff := &fields[i] + if bytes.Equal(ff.nameBytes, key) { + f = ff + break + } + if f == nil && ff.equalFold(ff.nameBytes, key) { + f = ff + } + } + if f != nil { + subv = v + destring = f.quoted + for _, i := range f.index { + if subv.Kind() == reflect.Ptr { + if subv.IsNil() { + subv.Set(reflect.New(subv.Type().Elem())) + } + subv = subv.Elem() + } + subv = subv.Field(i) + } + d.errorContext.Field = f.name + d.errorContext.Struct = v.Type().Name() + } + } + + // Read : before value. + if op == scanSkipSpace { + op = d.scanWhile(scanSkipSpace) + } + if op != scanObjectKey { + d.error(errPhase) + } + + if destring { + switch qv := d.valueQuoted().(type) { + case nil: + d.literalStore(nullLiteral, subv, false) + case string: + d.literalStore([]byte(qv), subv, true) + default: + d.saveError(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type())) + } + } else { + d.value(subv) + } + + // Write value back to map; + // if using struct, subv points into struct already. + if v.Kind() == reflect.Map { + kt := v.Type().Key() + var kv reflect.Value + switch { + case kt.Kind() == reflect.String: + kv = reflect.ValueOf(key).Convert(kt) + case reflect.PtrTo(kt).Implements(textUnmarshalerType): + kv = reflect.New(v.Type().Key()) + d.literalStore(item, kv, true) + kv = kv.Elem() + default: + switch kt.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + s := string(key) + n, err := strconv.ParseInt(s, 10, 64) + if err != nil || reflect.Zero(kt).OverflowInt(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) + return + } + kv = reflect.ValueOf(n).Convert(kt) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + s := string(key) + n, err := strconv.ParseUint(s, 10, 64) + if err != nil || reflect.Zero(kt).OverflowUint(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) + return + } + kv = reflect.ValueOf(n).Convert(kt) + default: + panic(tagLabel + ": Unexpected key type") // should never occur + } + } + v.SetMapIndex(kv, subv) + } + + // Next token must be , or }. + op = d.scanWhile(scanSkipSpace) + if op == scanEndObject { + break + } + if op != scanObjectValue { + d.error(errPhase) + } + + d.errorContext.Struct = "" + d.errorContext.Field = "" + } +} + +// literal consumes a literal from d.data[d.off-1:], decoding into the value v. +// The first byte of the literal has been read already +// (that's how the caller knows it's a literal). +func (d *decodeState) literal(v reflect.Value) { + // All bytes inside literal return scanContinue op code. + start := d.off - 1 + op := d.scanWhile(scanContinue) + + // Scan read one byte too far; back up. + d.off-- + d.scan.undo(op) + + d.literalStore(d.data[start:d.off], v, false) +} + +// convertNumber converts the number literal s to a float64 or a Number +// depending on the setting of d.useNumber. +func (d *decodeState) convertNumber(s string) (interface{}, error) { + if d.useNumber { + return Number(s), nil + } + f, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, &UnmarshalTypeError{Value: "number " + s, Type: reflect.TypeOf(0.0), Offset: int64(d.off)} + } + return f, nil +} + +var numberType = reflect.TypeOf(Number("")) + +// literalStore decodes a literal stored in item into v. +// +// fromQuoted indicates whether this literal came from unwrapping a +// string from the ",string" struct tag option. this is used only to +// produce more helpful error messages. +func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool) { + // Check for unmarshaler. + if len(item) == 0 { + //Empty string given + d.saveError(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + return + } + isNull := item[0] == 'n' // null + u, ut, pv := d.indirect(v, isNull) + if u != nil { + err := u.UnmarshalJSON(item) + if err != nil { + d.error(err) + } + return + } + if ut != nil { + if item[0] != '"' { + if fromQuoted { + d.saveError(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + } else { + var val string + switch item[0] { + case 'n': + val = "null" + case 't', 'f': + val = "bool" + default: + val = "number" + } + d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.off)}) + } + return + } + s, ok := unquoteBytes(item) + if !ok { + if fromQuoted { + d.error(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + } else { + d.error(errPhase) + } + } + err := ut.UnmarshalText(s) + if err != nil { + d.error(err) + } + return + } + + v = pv + + switch c := item[0]; c { + case 'n': // null + // The main parser checks that only true and false can reach here, + // but if this was a quoted string input, it could be anything. + if fromQuoted && string(item) != "null" { + d.saveError(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + break + } + switch v.Kind() { + case reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice: + v.Set(reflect.Zero(v.Type())) + // otherwise, ignore null for primitives/string + } + case 't', 'f': // true, false + value := item[0] == 't' + // The main parser checks that only true and false can reach here, + // but if this was a quoted string input, it could be anything. + if fromQuoted && string(item) != "true" && string(item) != "false" { + d.saveError(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + break + } + switch v.Kind() { + default: + if fromQuoted { + d.saveError(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + } else { + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.off)}) + } + case reflect.Bool: + v.SetBool(value) + case reflect.Interface: + if v.NumMethod() == 0 { + v.Set(reflect.ValueOf(value)) + } else { + d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.off)}) + } + } + + case '"': // string + s, ok := unquoteBytes(item) + if !ok { + if fromQuoted { + d.error(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + } else { + d.error(errPhase) + } + } + switch v.Kind() { + default: + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)}) + case reflect.Slice: + if v.Type().Elem().Kind() != reflect.Uint8 { + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)}) + break + } + b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) + n, err := base64.StdEncoding.Decode(b, s) + if err != nil { + d.saveError(err) + break + } + v.SetBytes(b[:n]) + case reflect.String: + v.SetString(string(s)) + case reflect.Interface: + if v.NumMethod() == 0 { + v.Set(reflect.ValueOf(string(s))) + } else { + d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)}) + } + } + + default: // number + if c != '-' && (c < '0' || c > '9') { + if fromQuoted { + d.error(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + } else { + d.error(errPhase) + } + } + s := string(item) + switch v.Kind() { + default: + if v.Kind() == reflect.String && v.Type() == numberType { + v.SetString(s) + if !isValidNumber(s) { + d.error(fmt.Errorf(tagLabel+": invalid number literal, trying to unmarshal %q into Number", item)) + } + break + } + if fromQuoted { + d.error(fmt.Errorf(tagLabel+": invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + } else { + d.error(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.off)}) + } + case reflect.Interface: + n, err := d.convertNumber(s) + if err != nil { + d.saveError(err) + break + } + if v.NumMethod() != 0 { + d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.off)}) + break + } + v.Set(reflect.ValueOf(n)) + + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + n, err := strconv.ParseInt(s, 10, 64) + if err != nil || v.OverflowInt(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)}) + break + } + v.SetInt(n) + + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + n, err := strconv.ParseUint(s, 10, 64) + if err != nil || v.OverflowUint(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)}) + break + } + v.SetUint(n) + + case reflect.Float32, reflect.Float64: + n, err := strconv.ParseFloat(s, v.Type().Bits()) + if err != nil || v.OverflowFloat(n) { + d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)}) + break + } + v.SetFloat(n) + } + } +} + +// The xxxInterface routines build up a value to be stored +// in an empty interface. They are not strictly necessary, +// but they avoid the weight of reflection in this common case. + +// valueInterface is like value but returns interface{} +func (d *decodeState) valueInterface() interface{} { + switch d.scanWhile(scanSkipSpace) { + default: + d.error(errPhase) + panic("unreachable") + case scanBeginArray: + return d.arrayInterface() + case scanBeginObject: + return d.objectInterface() + case scanBeginLiteral: + return d.literalInterface() + } +} + +// arrayInterface is like array but returns []interface{}. +func (d *decodeState) arrayInterface() []interface{} { + var v = make([]interface{}, 0) + for { + // Look ahead for ] - can only happen on first iteration. + op := d.scanWhile(scanSkipSpace) + if op == scanEndArray { + break + } + + // Back up so d.value can have the byte we just read. + d.off-- + d.scan.undo(op) + + v = append(v, d.valueInterface()) + + // Next token must be , or ]. + op = d.scanWhile(scanSkipSpace) + if op == scanEndArray { + break + } + if op != scanArrayValue { + d.error(errPhase) + } + } + return v +} + +// objectInterface is like object but returns map[string]interface{}. +func (d *decodeState) objectInterface() map[string]interface{} { + m := make(map[string]interface{}) + for { + // Read opening " of string key or closing }. + op := d.scanWhile(scanSkipSpace) + if op == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if op != scanBeginLiteral { + d.error(errPhase) + } + + // Read string key. + start := d.off - 1 + op = d.scanWhile(scanContinue) + item := d.data[start : d.off-1] + key, ok := unquote(item) + if !ok { + d.error(errPhase) + } + + // Read : before value. + if op == scanSkipSpace { + op = d.scanWhile(scanSkipSpace) + } + if op != scanObjectKey { + d.error(errPhase) + } + + // Read value. + m[key] = d.valueInterface() + + // Next token must be , or }. + op = d.scanWhile(scanSkipSpace) + if op == scanEndObject { + break + } + if op != scanObjectValue { + d.error(errPhase) + } + } + return m +} + +// literalInterface is like literal but returns an interface value. +func (d *decodeState) literalInterface() interface{} { + // All bytes inside literal return scanContinue op code. + start := d.off - 1 + op := d.scanWhile(scanContinue) + + // Scan read one byte too far; back up. + d.off-- + d.scan.undo(op) + item := d.data[start:d.off] + + switch c := item[0]; c { + case 'n': // null + return nil + + case 't', 'f': // true, false + return c == 't' + + case '"': // string + s, ok := unquote(item) + if !ok { + d.error(errPhase) + } + return s + + default: // number + if c != '-' && (c < '0' || c > '9') { + d.error(errPhase) + } + n, err := d.convertNumber(string(item)) + if err != nil { + d.saveError(err) + } + return n + } +} + +// getu4 decodes \uXXXX from the beginning of s, returning the hex value, +// or it returns -1. +func getu4(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + r, err := strconv.ParseUint(string(s[2:6]), 16, 64) + if err != nil { + return -1 + } + return rune(r) +} + +// unquote converts a quoted JSON string literal s into an actual string t. +// The rules are different than for Go, so cannot use strconv.Unquote. +func unquote(s []byte) (t string, ok bool) { + s, ok = unquoteBytes(s) + t = string(s) + return +} + +func unquoteBytes(s []byte) (t []byte, ok bool) { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return + } + s = s[1 : len(s)-1] + + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { + return s, true + } + + b := make([]byte, len(s)+2*utf8.UTFMax) + w := copy(b, s[0:r]) + for r < len(s) { + // Out of room? Can only happen if s is full of + // malformed UTF-8 and we're replacing each + // byte with RuneError. + if w >= len(b)-2*utf8.UTFMax { + nb := make([]byte, (len(b)+utf8.UTFMax)*2) + copy(nb, b[0:w]) + b = nb + } + switch c := s[r]; { + case c == '\\': + r++ + if r >= len(s) { + return + } + switch s[r] { + default: + return + case '"', '\\', '/', '\'': + b[w] = s[r] + r++ + w++ + case 'b': + b[w] = '\b' + r++ + w++ + case 'f': + b[w] = '\f' + r++ + w++ + case 'n': + b[w] = '\n' + r++ + w++ + case 'r': + b[w] = '\r' + r++ + w++ + case 't': + b[w] = '\t' + r++ + w++ + case 'u': + r-- + rr := getu4(s[r:]) + if rr < 0 { + return + } + r += 6 + if utf16.IsSurrogate(rr) { + rr1 := getu4(s[r:]) + if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { + // A valid pair; consume. + r += 6 + w += utf8.EncodeRune(b[w:], dec) + break + } + // Invalid surrogate; fall back to replacement rune. + rr = unicode.ReplacementChar + } + w += utf8.EncodeRune(b[w:], rr) + } + + // Quote, control characters are invalid. + case c == '"', c < ' ': + return + + // ASCII + case c < utf8.RuneSelf: + b[w] = c + r++ + w++ + + // Coerce to well-formed UTF-8. + default: + rr, size := utf8.DecodeRune(s[r:]) + r += size + w += utf8.EncodeRune(b[w:], rr) + } + } + return b[0:w], true +} diff --git a/decode_test.go b/decode_test.go new file mode 100644 index 0000000..c38ca35 --- /dev/null +++ b/decode_test.go @@ -0,0 +1,149 @@ +package jsonld + +import ( + "strconv" + "testing" +) + +func TestUnmarshalWithEmptyJsonObject(t *testing.T) { + obj := mockTypeA{} + err := Unmarshal([]byte("{}"), &obj) + if err != nil { + t.Error(err) + } + if obj.Id != "" { + t.Errorf("Id should have been an empty string, found %s", obj.Id) + } + if obj.Name != "" { + t.Errorf("Name should have been an empty string, found %s", obj.Name) + } + if obj.Type != "" { + t.Errorf("Type should have been an empty string, found %s", obj.Type) + } + if obj.PropA != "" { + t.Errorf("PropA should have been an empty string, found %s", obj.PropA) + } + if obj.PropB != 0 { + t.Errorf("PropB should have been 0.0, found %f", obj.PropB) + } +} + +type mockWithContext struct { + mockTypeA + Context Context `jsonld:"@context"` +} + +func TestUnmarshalWithEmptyJsonObjectWithStringContext(t *testing.T) { + obj := mockWithContext{} + url := "http://www.habarnam.ro" + data := []byte(`{"@context": "` + url + `" }`) + err := Unmarshal(data, &obj) + if err != nil { + t.Error(err) + } + if obj.Id != "" { + t.Errorf("Id should have been an empty string, found %s", obj.Id) + } + if obj.Name != "" { + t.Errorf("Name should have been an empty string, found %s", obj.Name) + } + if obj.Type != "" { + t.Errorf("Type should have been an empty string, found %s", obj.Type) + } + if obj.PropA != "" { + t.Errorf("PropA should have been an empty string, found %s", obj.PropA) + } + if obj.PropB != 0 { + t.Errorf("PropB should have been 0.0, found %f", obj.PropB) + } +} + +func TestUnmarshalWithEmptyJsonObjectWithObjectContext(t *testing.T) { + obj := mockWithContext{} + url := "http://www.habarnam.ro" + data := []byte(`{"@context": { "@url": "` + url + `"} }`) + err := Unmarshal(data, &obj) + if err != nil { + t.Error(err) + } + if obj.Id != "" { + t.Errorf("Id should have been an empty string, found %s", obj.Id) + } + if obj.Name != "" { + t.Errorf("Name should have been an empty string, found %s", obj.Name) + } + if obj.Type != "" { + t.Errorf("Type should have been an empty string, found %s", obj.Type) + } + if obj.PropA != "" { + t.Errorf("PropA should have been an empty string, found %s", obj.PropA) + } + if obj.PropB != 0 { + t.Errorf("PropB should have been 0.0, found %f", obj.PropB) + } +} + +func TestUnmarshalWithEmptyJsonObjectWithOneLanguageContext(t *testing.T) { + obj := mockWithContext{} + url := "http://www.habarnam.ro" + langEn := "en-US" + + data := []byte(`{"@context": { "@url": "` + url + `", "@language": "` + langEn + `"} }`) + err := Unmarshal(data, &obj) + if err != nil { + t.Error(err) + } + if obj.Id != "" { + t.Errorf("Id should have been an empty string, found %s", obj.Id) + } + if obj.Name != "" { + t.Errorf("Name should have been an empty string, found %s", obj.Name) + } + if obj.Type != "" { + t.Errorf("Type should have been an empty string, found %s", obj.Type) + } + if obj.PropA != "" { + t.Errorf("PropA should have been an empty string, found %s", obj.PropA) + } + if obj.PropB != 0 { + t.Errorf("PropB should have been 0.0, found %f", obj.PropB) + } +} +func TestUnmarshalWithEmptyJsonObjectWithFullObject(t *testing.T) { + obj := mockWithContext{} + url := "http://www.habarnam.ro" + langEn := "en-US" + propA := "ana" + var propB float32 = 6.66 + typ := "test" + name := "test object #1" + id := "777sdad" + + data := []byte(`{ + "@context": { "@url": "` + url + `", "@language": "` + langEn + `"}, + "PropA": "` + propA + `", + "PropB": ` + strconv.FormatFloat(float64(propB), 'f', 2, 32) + `, + "Id" : "` + id + `", + "Name" : "` + name + `", + "Type" : "` + typ + `" + }`) + err := Unmarshal(data, &obj) + if err != nil { + t.Error(err) + } + if obj.Id != id { + t.Errorf("Id should have been %q, found %q", id, obj.Id) + } + if obj.Name != name { + t.Errorf("Name should have been %q, found %q", name, obj.Name) + } + if obj.Type != typ { + t.Errorf("Type should have been %q, found %q", typ, obj.Type) + } + if obj.PropA != propA { + t.Errorf("PropA should have been %q, found %q", propA, obj.PropA) + } + if obj.PropB != propB { + t.Errorf("PropB should have been %f, found %f", propB, obj.PropB) + } +} diff --git a/encode.go b/encode.go new file mode 100644 index 0000000..bbb0b2c --- /dev/null +++ b/encode.go @@ -0,0 +1,1377 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package jsonld implements encoding and decoding of JSON as defined in +// RFC 4627. The mapping between JSON and Go values is described +// in the documentation for the Marshal and Unmarshal functions. +// +// See tagLabel + " and Go" for an introduction to this package: +// https://golang.org/doc/articles/json_and_go.html +package jsonld + +import ( + "bytes" + "encoding" + "encoding/base64" + "encoding/json" + "fmt" + "math" + "reflect" + "runtime" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "unicode" + "unicode/utf8" +) + +var Ctxt []Collapsible + +const ( + tagLabel = "jsonld" + tagOmitEmpty = "omitempty" + tagCollapsible = "collapsible" + tagNoMarshal = "nomarshal" +) + +type payloadWithContext struct { + Context []Collapsible `jsonld:"@context,omitempty,collapsible"` + ID interface{} `jsonld:"@id,omitempty,collapsible"` + Type interface{} `jsonld:"@type,omitempty,collapsible"` + Obj interface{} +} + +func (p payloadWithContext) Collapse() interface{} { + return p +} + +// WithContext +func WithContext(c ...Collapsible) payloadWithContext { + Ctxt = c + return payloadWithContext{ + Context: c, + } +} + +var payloadType = reflect.TypeOf(new(payloadWithContext)).Elem() + +// Marshal +func (p payloadWithContext) Marshal(v interface{}) ([]byte, error) { + p.Obj = v + return Marshal(p) +} + +// Tag used by structs from the ActivityPub package to Marshal and Unmarshal to/from JSON-LD +type Tag struct { + Name string + Ignore bool + OmitEmpty bool + Collapsible bool + NoMarshal bool +} + +// LoadTag used by structs from the ActivityPub package to Marshal and Unmarshal to/from JSON-LD +func LoadTag(tag reflect.StructTag) (Tag, bool) { + jlTag, ok := tag.Lookup(tagLabel) + if !ok { + return Tag{}, false + } + val := strings.Split(jlTag, ",") + cont := func(arr []string, s string) bool { + for _, v := range arr { + if v == s { + return true + } + } + return false + } + t := Tag{ + OmitEmpty: cont(val, tagOmitEmpty), + Collapsible: cont(val, tagCollapsible), + NoMarshal: cont(val, tagNoMarshal), + } + t.Name, t.Ignore = func(v string) (string, bool) { + if len(v) > 0 && v != "_" { + return v, false + } + return "", true + }(val[0]) + + return t, true +} + +// TagName used by structs from the ActivityPub package to Marshal and Unmarshal to/from JSON-LD +func TagName(n string, tag Tag) string { + if len(tag.Name) > 0 { + return tag.Name + } + return n +} + +// An UnsupportedTypeError is returned by Marshal when attempting +// to encode an unsupported value type. +type UnsupportedTypeError struct { + Type reflect.Type +} + +// Marshal returns the JSON encoding of v. +// +// Marshal traverses the value v recursively. +// If an encountered value implements the Marshaler interface +// and is not a nil pointer, Marshal calls its MarshalJSON method +// to produce JSON. If no MarshalJSON method is present but the +// value implements encoding.TextMarshaler instead, Marshal calls +// its MarshalText method and encodes the result as a JSON string. +// The nil pointer exception is not strictly necessary +// but mimics a similar, necessary exception in the behavior of +// UnmarshalJSON. +// +// Otherwise, Marshal uses the following type-dependent default encodings: +// +// Boolean values encode as JSON booleans. +// +// Floating point, integer, and Number values encode as JSON numbers. +// +// String values encode as JSON strings coerced to valid UTF-8, +// replacing invalid bytes with the Unicode replacement rune. +// The angle brackets "<" and ">" are escaped to "\u003c" and "\u003e" +// to keep some browsers from misinterpreting JSON output as HTML. +// Ampersand "&" is also escaped to "\u0026" for the same reason. +// This escaping can be disabled using an Encoder that had SetEscapeHTML(false) +// called on it. +// +// Array and slice values encode as JSON arrays, except that +// []byte encodes as a base64-encoded string, and a nil slice +// encodes as the null JSON value. +// +// Struct values encode as JSON objects. +// Each exported struct field becomes a member of the object, using the +// field name as the object key, unless the field is omitted for one of the +// reasons given below. +// +// The encoding of each struct field can be customized by the format string +// stored under the tagLabel + "" key in the struct field's tag. +// The format string gives the name of the field, possibly followed by a +// comma-separated list of options. The name may be empty in order to +// specify options without overriding the default field name. +// +// The "omitempty" option specifies that the field should be omitted +// from the encoding if the field has an empty value, defined as +// false, 0, a nil pointer, a nil interface value, and any empty array, +// slice, map, or string. +// +// As a special case, if the field tag is "-", the field is always omitted. +// Note that a field with name "-" can still be generated using the tag "-,". +// +// Examples of struct field tags and their meanings: +// +// // Field appears in JSON as key "myName". +// Field int `json:"myName"` +// +// // Field appears in JSON as key "myName" and +// // the field is omitted from the object if its value is empty, +// // as defined above. +// Field int `json:"myName,omitempty"` +// +// // Field appears in JSON as key "Field" (the default), but +// // the field is skipped if empty. +// // Note the leading comma. +// Field int `json:",omitempty"` +// +// // Field is ignored by this package. +// Field int `json:"-"` +// +// // Field appears in JSON as key "-". +// Field int `json:"-,"` +// +// The "string" option signals that a field is stored as JSON inside a +// JSON-encoded string. It applies only to fields of string, floating point, +// integer, or boolean types. This extra level of encoding is sometimes used +// when communicating with JavaScript programs: +// +// Int64String int64 `json:",string"` +// +// The key name will be used if it's a non-empty string consisting of +// only Unicode letters, digits, and ASCII punctuation except quotation +// marks, backslash, and comma. +// +// Anonymous struct fields are usually marshaled as if their inner exported fields +// were fields in the outer struct, subject to the usual Go visibility rules amended +// as described in the next paragraph. +// An anonymous struct field with a name given in its JSON tag is treated as +// having that name, rather than being anonymous. +// An anonymous struct field of interface type is treated the same as having +// that type as its name, rather than being anonymous. +// +// The Go visibility rules for struct fields are amended for JSON when +// deciding which field to marshal or unmarshal. If there are +// multiple fields at the same level, and that level is the least +// nested (and would therefore be the nesting level selected by the +// usual Go rules), the following extra rules apply: +// +// 1) Of those fields, if any are JSON-tagged, only tagged fields are considered, +// even if there are multiple untagged fields that would otherwise conflict. +// +// 2) If there is exactly one field (tagged or not according to the first rule), that is selected. +// +// 3) Otherwise there are multiple fields, and all are ignored; no error occurs. +// +// Handling of anonymous struct fields is new in Go 1.1. +// Prior to Go 1.1, anonymous struct fields were ignored. To force ignoring of +// an anonymous struct field in both current and earlier versions, give the field +// a JSON tag of "-". +// +// Map values encode as JSON objects. The map's key type must either be a +// string, an integer type, or implement encoding.TextMarshaler. The map keys +// are sorted and used as JSON object keys by applying the following rules, +// subject to the UTF-8 coercion described for string values above: +// - string keys are used directly +// - encoding.TextMarshalers are marshaled +// - integer keys are converted to strings +// +// Pointer values encode as the value pointed to. +// A nil pointer encodes as the null JSON value. +// +// Interface values encode as the value contained in the interface. +// A nil interface value encodes as the null JSON value. +// +// Channel, complex, and function values cannot be encoded in JSON. +// Attempting to encode such a value causes Marshal to return +// an UnsupportedTypeError. +// +// JSON cannot represent cyclic data structures and Marshal does not +// handle them. Passing cyclic structures to Marshal will result in +// an infinite recursion. + +func Marshal(v interface{}) ([]byte, error) { + e := &encodeState{} + + err := e.marshal(v, encOpts{escapeHTML: false}) + if err != nil { + return nil, err + } + output := e.Bytes() + if v == nil { + return output, nil + } + + repl := func(dat *[]byte) { + // @todo(marius): fix this ugly hack + o := *dat + if bytes.Contains(o, []byte(`"Obj":null`)) { + o = bytes.Replace(o, []byte(`,"Obj":null`), []byte(""), 1) + } else { + o = bytes.Replace(o, []byte(`,"Obj":{`), []byte(","), 1) + o = o[:len(o)-1] + } + *dat = o + } + + switch v.(type) { + case payloadWithContext: + repl(&output) + case *payloadWithContext: + repl(&output) + } + return output, nil +} + +func (e *UnsupportedTypeError) Error() string { + return tagLabel + ": unsupported type: " + e.Type.String() +} + +type UnsupportedValueError struct { + Value reflect.Value + Str string +} + +func (e *UnsupportedValueError) Error() string { + return tagLabel + ": unsupported value: " + e.Str +} + +// Before Go 1.2, an InvalidUTF8Error was returned by Marshal when +// attempting to encode a string value with invalid UTF-8 sequences. +// As of Go 1.2, Marshal instead coerces the string to valid UTF-8 by +// replacing invalid bytes with the Unicode replacement rune U+FFFD. +// This error is no longer generated but is kept for backwards compatibility +// with programs that might mention it. +type InvalidUTF8Error struct { + S string // the whole string value that caused the error +} + +func (e *InvalidUTF8Error) Error() string { + return tagLabel + ": invalid UTF-8 in string: " + strconv.Quote(e.S) +} + +type MarshalerError struct { + Type reflect.Type + Err error +} + +func (e *MarshalerError) Error() string { + return tagLabel + ": error calling MarshalJSON for type " + e.Type.String() + ": " + e.Err.Error() +} + +var hex = "0123456789abcdef" + +// An encodeState encodes JSON into a bytes.Buffer. +type encodeState struct { + bytes.Buffer // accumulated output + scratch [64]byte +} + +var encodeStatePool sync.Pool + +func newEncodeState() *encodeState { + if v := encodeStatePool.Get(); v != nil { + e := v.(*encodeState) + e.Reset() + return e + } + return new(encodeState) +} + +func (e *encodeState) marshal(v interface{}, opts encOpts) (err error) { + defer func() { + if r := recover(); r != nil { + if _, ok := r.(runtime.Error); ok { + panic(r) + } + if s, ok := r.(string); ok { + panic(s) + } + err = r.(error) + } + }() + e.reflectValue(reflect.ValueOf(v), opts) + return nil +} + +func (e *encodeState) error(err error) { + panic(err) +} + +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.Array, reflect.Map, reflect.Slice, reflect.String: + return v.Len() == 0 + case reflect.Bool: + return !v.Bool() + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return v.Int() == 0 + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return v.Uint() == 0 + case reflect.Float32, reflect.Float64: + return v.Float() == 0 + case reflect.Interface, reflect.Ptr: + return v.IsNil() + case reflect.Struct: + // this is important as it removes structs containing only empty elements + // to ensure that the jsonld message is not extra verbose with valueless properties + return func(reflect.Value) bool { + var ret bool = true + for i := 0; i < v.NumField(); i++ { + ret = ret && isEmptyValue(v.Field(i)) + } + return ret + }(v) + } + return false +} + +func (e *encodeState) reflectValue(v reflect.Value, opts encOpts) { + valueEncoder(v)(e, v, opts) +} + +type encOpts struct { + // quoted causes primitive fields to be encoded inside JSON strings. + quoted bool + // escapeHTML causes '<', '>', and '&' to be escaped in JSON strings. + escapeHTML bool +} + +type encoderFunc func(e *encodeState, v reflect.Value, opts encOpts) + +var encoderCache sync.Map // map[reflect.Type]encoderFunc + +func valueEncoder(v reflect.Value) encoderFunc { + if !v.IsValid() { + return invalidValueEncoder + } + return typeEncoder(v.Type()) +} + +func typeEncoder(t reflect.Type) encoderFunc { + if fi, ok := encoderCache.Load(t); ok { + return fi.(encoderFunc) + } + + // To deal with recursive types, populate the map with an + // indirect func before we build it. This type waits on the + // real func (f) to be ready and then calls it. This indirect + // func is only used for recursive types. + var ( + wg sync.WaitGroup + f encoderFunc + ) + wg.Add(1) + fi, loaded := encoderCache.LoadOrStore(t, encoderFunc(func(e *encodeState, v reflect.Value, opts encOpts) { + wg.Wait() + f(e, v, opts) + })) + if loaded { + return fi.(encoderFunc) + } + + // Compute the real encoder and replace the indirect func with it. + f = newTypeEncoder(t, true) + wg.Done() + encoderCache.Store(t, f) + return f +} + +var ( + marshalerType = reflect.TypeOf(new(json.Marshaler)).Elem() + textMarshalerType = reflect.TypeOf(new(encoding.TextMarshaler)).Elem() +) + +// newTypeEncoder constructs an encoderFunc for a type. +// The returned encoder only checks CanAddr when allowAddr is true. +func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc { + if t.Implements(marshalerType) { + return marshalerEncoder + } + if t.Kind() != reflect.Ptr && allowAddr { + if reflect.PtrTo(t).Implements(marshalerType) { + return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false)) + } + } + + if t.Implements(textMarshalerType) { + return textMarshalerEncoder + } + if t.Kind() != reflect.Ptr && allowAddr { + if reflect.PtrTo(t).Implements(textMarshalerType) { + return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false)) + } + } + + switch t.Kind() { + case reflect.Bool: + return boolEncoder + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return intEncoder + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return uintEncoder + case reflect.Float32: + return float32Encoder + case reflect.Float64: + return float64Encoder + case reflect.String: + return stringEncoder + case reflect.Interface: + return interfaceEncoder + case reflect.Struct: + return newStructEncoder(t) + case reflect.Map: + return newMapEncoder(t) + case reflect.Slice: + return newSliceEncoder(t) + case reflect.Array: + return newArrayEncoder(t) + case reflect.Ptr: + return newPtrEncoder(t) + default: + return unsupportedTypeEncoder + } +} + +func invalidValueEncoder(e *encodeState, v reflect.Value, _ encOpts) { + e.WriteString("null") +} + +func marshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.Kind() == reflect.Ptr && v.IsNil() { + e.WriteString("null") + return + } + m, ok := v.Interface().(json.Marshaler) + if !ok { + e.WriteString("null") + return + } + b, err := m.MarshalJSON() + if err == nil { + // copy JSON into buffer, checking validity. + err = json.Compact(&e.Buffer, b) + } + if err != nil { + e.error(&MarshalerError{v.Type(), err}) + } +} + +func addrMarshalerEncoder(e *encodeState, v reflect.Value, _ encOpts) { + va := v.Addr() + if va.IsNil() { + e.WriteString("null") + return + } + m := va.Interface().(json.Marshaler) + b, err := m.MarshalJSON() + if err == nil { + // copy JSON into buffer, checking validity. + err = json.Compact(&e.Buffer, b) + } + if err != nil { + e.error(&MarshalerError{v.Type(), err}) + } +} + +func textMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.Kind() == reflect.Ptr && v.IsNil() { + e.WriteString("null") + return + } + m := v.Interface().(encoding.TextMarshaler) + b, err := m.MarshalText() + if err != nil { + e.error(&MarshalerError{v.Type(), err}) + } + e.stringBytes(b, opts.escapeHTML) +} + +func addrTextMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { + va := v.Addr() + if va.IsNil() { + e.WriteString("null") + return + } + m := va.Interface().(encoding.TextMarshaler) + b, err := m.MarshalText() + if err != nil { + e.error(&MarshalerError{v.Type(), err}) + } + e.stringBytes(b, opts.escapeHTML) +} + +func boolEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if opts.quoted { + e.WriteByte('"') + } + if v.Bool() { + e.WriteString("true") + } else { + e.WriteString("false") + } + if opts.quoted { + e.WriteByte('"') + } +} + +func intEncoder(e *encodeState, v reflect.Value, opts encOpts) { + b := strconv.AppendInt(e.scratch[:0], v.Int(), 10) + if opts.quoted { + e.WriteByte('"') + } + e.Write(b) + if opts.quoted { + e.WriteByte('"') + } +} + +func uintEncoder(e *encodeState, v reflect.Value, opts encOpts) { + b := strconv.AppendUint(e.scratch[:0], v.Uint(), 10) + if opts.quoted { + e.WriteByte('"') + } + e.Write(b) + if opts.quoted { + e.WriteByte('"') + } +} + +type floatEncoder int // number of bits + +func (bits floatEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + f := v.Float() + if math.IsInf(f, 0) || math.IsNaN(f) { + e.error(&UnsupportedValueError{v, strconv.FormatFloat(f, 'g', -1, int(bits))}) + } + + // Convert as if by ES6 number to string conversion. + // This matches most other JSON generators. + // See golang.org/issue/6384 and golang.org/issue/14135. + // Like fmt %g, but the exponent cutoffs are different + // and exponents themselves are not padded to two digits. + b := e.scratch[:0] + abs := math.Abs(f) + fmt := byte('f') + // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. + if abs != 0 { + if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { + fmt = 'e' + } + } + b = strconv.AppendFloat(b, f, fmt, -1, int(bits)) + if fmt == 'e' { + // clean up e-09 to e-9 + n := len(b) + if n >= 4 && b[n-4] == 'e' && b[n-3] == '-' && b[n-2] == '0' { + b[n-2] = b[n-1] + b = b[:n-1] + } + } + + if opts.quoted { + e.WriteByte('"') + } + e.Write(b) + if opts.quoted { + e.WriteByte('"') + } +} + +var ( + float32Encoder = (floatEncoder(32)).encode + float64Encoder = (floatEncoder(64)).encode +) + +func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.Type() == numberType { + numStr := v.String() + // In Go1.5 the empty string encodes to "0", while this is not a valid number literal + // we keep compatibility so check validity after this. + if numStr == "" { + numStr = "0" // Number's zero-val + } + if !isValidNumber(numStr) { + e.error(fmt.Errorf(tagLabel+": invalid number literal %q", numStr)) + } + e.WriteString(numStr) + return + } + if opts.quoted { + sb, err := Marshal(v.String()) + if err != nil { + e.error(err) + } + e.string(string(sb), opts.escapeHTML) + } else { + e.string(v.String(), opts.escapeHTML) + } +} + +func interfaceEncoder(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + e.reflectValue(v.Elem(), opts) +} + +func unsupportedTypeEncoder(e *encodeState, v reflect.Value, _ encOpts) { + e.error(&UnsupportedTypeError{v.Type()}) +} + +type structEncoder struct { + fields []field + fieldEncs []encoderFunc +} + +func (se *structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + e.WriteByte('{') + first := true + for i, f := range se.fields { + fv := fieldByIndex(v, f.index) + if !fv.IsValid() || f.omitEmpty && isEmptyValue(fv) { + continue + } + if first { + first = false + } else { + e.WriteByte(',') + } + e.string(f.name, opts.escapeHTML) + e.WriteByte(':') + opts.quoted = f.quoted + se.fieldEncs[i](e, fv, opts) + } + e.WriteByte('}') +} + +func newStructEncoder(t reflect.Type) encoderFunc { + fields := cachedTypeFields(t) + se := &structEncoder{ + fields: fields, + fieldEncs: make([]encoderFunc, len(fields)), + } + for i, f := range fields { + se.fieldEncs[i] = typeEncoder(typeByIndex(t, f.index)) + } + return se.encode +} + +type mapEncoder struct { + elemEnc encoderFunc +} + +func (me *mapEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + e.WriteByte('{') + + // Extract and sort the keys. + keys := v.MapKeys() + sv := make([]reflectWithString, len(keys)) + for i, v := range keys { + sv[i].v = v + if err := sv[i].resolve(); err != nil { + e.error(&MarshalerError{v.Type(), err}) + } + } + sort.Slice(sv, func(i, j int) bool { return sv[i].s < sv[j].s }) + + for i, kv := range sv { + if i > 0 { + e.WriteByte(',') + } + e.string(kv.s, opts.escapeHTML) + e.WriteByte(':') + me.elemEnc(e, v.MapIndex(kv.v), opts) + } + e.WriteByte('}') +} + +func newMapEncoder(t reflect.Type) encoderFunc { + switch t.Key().Kind() { + case reflect.String, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + default: + if !t.Key().Implements(textMarshalerType) { + return unsupportedTypeEncoder + } + } + me := &mapEncoder{typeEncoder(t.Elem())} + return me.encode +} + +func encodeByteSlice(e *encodeState, v reflect.Value, _ encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + s := v.Bytes() + e.WriteByte('"') + if len(s) < 1024 { + // for small buffers, using Encode directly is much faster. + dst := make([]byte, base64.StdEncoding.EncodedLen(len(s))) + base64.StdEncoding.Encode(dst, s) + e.Write(dst) + } else { + // for large buffers, avoid unnecessary extra temporary + // buffer space. + enc := base64.NewEncoder(base64.StdEncoding, e) + enc.Write(s) + enc.Close() + } + e.WriteByte('"') +} + +// sliceEncoder just wraps an arrayEncoder, checking to make sure the value isn't nil. +type sliceEncoder struct { + arrayEnc encoderFunc +} + +func (se *sliceEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + se.arrayEnc(e, v, opts) +} + +func newSliceEncoder(t reflect.Type) encoderFunc { + // Byte slices get special treatment; arrays don't. + if t.Elem().Kind() == reflect.Uint8 { + p := reflect.PtrTo(t.Elem()) + if !p.Implements(marshalerType) && !p.Implements(textMarshalerType) { + return encodeByteSlice + } + } + enc := &sliceEncoder{newArrayEncoder(t)} + return enc.encode +} + +type arrayEncoder struct { + elemEnc encoderFunc +} + +func (ae *arrayEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + n := v.Len() + if n == 1 { + ae.elemEnc(e, v.Index(0), opts) + return + } + e.WriteByte('[') + for i := 0; i < n; i++ { + if i > 0 { + e.WriteByte(',') + } + ae.elemEnc(e, v.Index(i), opts) + } + e.WriteByte(']') +} + +func newArrayEncoder(t reflect.Type) encoderFunc { + enc := &arrayEncoder{typeEncoder(t.Elem())} + return enc.encode +} + +type ptrEncoder struct { + elemEnc encoderFunc +} + +func (pe *ptrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.IsNil() { + e.WriteString("null") + return + } + pe.elemEnc(e, v.Elem(), opts) +} + +func newPtrEncoder(t reflect.Type) encoderFunc { + enc := &ptrEncoder{typeEncoder(t.Elem())} + return enc.encode +} + +type condAddrEncoder struct { + canAddrEnc, elseEnc encoderFunc +} + +func (ce *condAddrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { + if v.CanAddr() { + ce.canAddrEnc(e, v, opts) + } else { + ce.elseEnc(e, v, opts) + } +} + +// newCondAddrEncoder returns an encoder that checks whether its value +// CanAddr and delegates to canAddrEnc if so, else to elseEnc. +func newCondAddrEncoder(canAddrEnc, elseEnc encoderFunc) encoderFunc { + enc := &condAddrEncoder{canAddrEnc: canAddrEnc, elseEnc: elseEnc} + return enc.encode +} + +func isValidTag(s string) bool { + if s == "" { + return false + } + for _, c := range s { + switch { + case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~ ", c): + // Backslash and quote chars are reserved, but + // otherwise any punctuation chars are allowed + // in a tag name. + default: + if !unicode.IsLetter(c) && !unicode.IsDigit(c) { + return false + } + } + } + return true +} + +func fieldByIndex(v reflect.Value, index []int) reflect.Value { + for _, i := range index { + if v.Kind() == reflect.Ptr { + if v.IsNil() { + return reflect.Value{} + } + v = v.Elem() + } + v = v.Field(i) + } + return v +} + +func typeByIndex(t reflect.Type, index []int) reflect.Type { + for _, i := range index { + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + t = t.Field(i).Type + } + return t +} + +type reflectWithString struct { + v reflect.Value + s string +} + +func (w *reflectWithString) resolve() error { + if w.v.Kind() == reflect.String { + w.s = w.v.String() + return nil + } + if tm, ok := w.v.Interface().(encoding.TextMarshaler); ok { + buf, err := tm.MarshalText() + w.s = string(buf) + return err + } + switch w.v.Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + w.s = strconv.FormatInt(w.v.Int(), 10) + return nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + w.s = strconv.FormatUint(w.v.Uint(), 10) + return nil + } + panic("unexpected map key type") +} + +// NOTE: keep in sync with stringBytes below. +func (e *encodeState) string(s string, escapeHTML bool) int { + len0 := e.Len() + e.WriteByte('"') + start := 0 + for i := 0; i < len(s); { + if b := s[i]; b < utf8.RuneSelf { + if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) { + i++ + continue + } + if start < i { + e.WriteString(s[start:i]) + } + switch b { + case '\\', '"': + e.WriteByte('\\') + e.WriteByte(b) + case '\n': + e.WriteByte('\\') + e.WriteByte('n') + case '\r': + e.WriteByte('\\') + e.WriteByte('r') + case '\t': + e.WriteByte('\\') + e.WriteByte('t') + default: + // This encodes bytes < 0x20 except for \t, \n and \r. + // If escapeHTML is set, it also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. + e.WriteString(`\u00`) + e.WriteByte(hex[b>>4]) + e.WriteByte(hex[b&0xF]) + } + i++ + start = i + continue + } + c, size := utf8.DecodeRuneInString(s[i:]) + if c == utf8.RuneError && size == 1 { + if start < i { + e.WriteString(s[start:i]) + } + e.WriteString(`\ufffd`) + i += size + start = i + continue + } + // U+2028 is LINE SEPARATOR. + // U+2029 is PARAGRAPH SEPARATOR. + // They are both technically valid characters in JSON strings, + // but don't work in JSONP, which has to be evaluated as JavaScript, + // and can lead to security holes there. It is valid JSON to + // escape them, so we do so unconditionally. + // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. + if c == '\u2028' || c == '\u2029' { + if start < i { + e.WriteString(s[start:i]) + } + e.WriteString(`\u202`) + e.WriteByte(hex[c&0xF]) + i += size + start = i + continue + } + i += size + } + if start < len(s) { + e.WriteString(s[start:]) + } + e.WriteByte('"') + return e.Len() - len0 +} + +// NOTE: keep in sync with string above. +func (e *encodeState) stringBytes(s []byte, escapeHTML bool) int { + len0 := e.Len() + e.WriteByte('"') + start := 0 + for i := 0; i < len(s); { + if b := s[i]; b < utf8.RuneSelf { + if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) { + i++ + continue + } + if start < i { + e.Write(s[start:i]) + } + switch b { + case '\\', '"': + e.WriteByte('\\') + e.WriteByte(b) + case '\n': + e.WriteByte('\\') + e.WriteByte('n') + case '\r': + e.WriteByte('\\') + e.WriteByte('r') + case '\t': + e.WriteByte('\\') + e.WriteByte('t') + default: + // This encodes bytes < 0x20 except for \t, \n and \r. + // If escapeHTML is set, it also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. + e.WriteString(`\u00`) + e.WriteByte(hex[b>>4]) + e.WriteByte(hex[b&0xF]) + } + i++ + start = i + continue + } + c, size := utf8.DecodeRune(s[i:]) + if c == utf8.RuneError && size == 1 { + if start < i { + e.Write(s[start:i]) + } + e.WriteString(`\ufffd`) + i += size + start = i + continue + } + // U+2028 is LINE SEPARATOR. + // U+2029 is PARAGRAPH SEPARATOR. + // They are both technically valid characters in JSON strings, + // but don't work in JSONP, which has to be evaluated as JavaScript, + // and can lead to security holes there. It is valid JSON to + // escape them, so we do so unconditionally. + // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion. + if c == '\u2028' || c == '\u2029' { + if start < i { + e.Write(s[start:i]) + } + e.WriteString(`\u202`) + e.WriteByte(hex[c&0xF]) + i += size + start = i + continue + } + i += size + } + if start < len(s) { + e.Write(s[start:]) + } + e.WriteByte('"') + return e.Len() - len0 +} + +// A field represents a single field found in a struct. +type field struct { + name string + nameBytes []byte // []byte(Name) + equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent + + tag bool + index []int + typ reflect.Type + omitEmpty bool + collapsible bool + quoted bool +} + +func fillField(f field) field { + f.nameBytes = []byte(f.name) + f.equalFold = foldFunc(f.nameBytes) + return f +} + +// byIndex sorts field by index sequence. +type byIndex []field + +func (x byIndex) Len() int { return len(x) } + +func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] } + +func (x byIndex) Less(i, j int) bool { + for k, xik := range x[i].index { + if k >= len(x[j].index) { + return false + } + if xik != x[j].index[k] { + return xik < x[j].index[k] + } + } + return len(x[i].index) < len(x[j].index) +} + +// typeFields returns a list of fields that JSON should recognize for the given type. +// The algorithm is breadth-first search over the set of structs to include - the top struct +// and then any reachable anonymous structs. +func typeFields(t reflect.Type) []field { + // Anonymous fields to explore at the current level and the next. + current := []field{} + next := []field{{typ: t}} + + // Count of queued names for current level and the next. + count := map[reflect.Type]int{} + nextCount := map[reflect.Type]int{} + + // Types already visited at an earlier level. + visited := map[reflect.Type]bool{} + + // Fields found. + var fields []field + + for len(next) > 0 { + current, next = next, current[:0] + count, nextCount = nextCount, map[reflect.Type]int{} + + for _, f := range current { + if visited[f.typ] { + continue + } + visited[f.typ] = true + + // Scan f.typ for fields to include. + for i := 0; i < f.typ.NumField(); i++ { + sf := f.typ.Field(i) + if sf.Anonymous { + t := sf.Type + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + // If embedded, StructField.PkgPath is not a reliable + // indicator of whether the field is exported. + // See https://golang.org/issue/21122 + if !isExported(t.Name()) && t.Kind() != reflect.Struct { + // Ignore embedded fields of unexported non-struct types. + // Do not ignore embedded fields of unexported struct types + // since they may have exported fields. + continue + } + } else if sf.PkgPath != "" { + // Ignore unexported non-embedded fields. + continue + } + tag := sf.Tag.Get(tagLabel) + if tag == "-" { + continue + } + if myTag, ok := LoadTag(sf.Tag); ok { + if myTag.NoMarshal { + continue + } + } + name, opts := parseTag(tag) + if !isValidTag(name) { + name = "" + } + index := make([]int, len(f.index)+1) + copy(index, f.index) + index[len(f.index)] = i + + ft := sf.Type + if ft.Name() == "" && ft.Kind() == reflect.Ptr { + // Follow pointer. + ft = ft.Elem() + } + + // Only strings, floats, integers, and booleans can be quoted. + quoted := false + if opts.Contains("string") { + switch ft.Kind() { + case reflect.Bool, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, + reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Float32, reflect.Float64, + reflect.String: + quoted = true + } + } + + // Record found field and index sequence. + if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct { + tagged := name != "" + if name == "" { + name = sf.Name + } + fields = append(fields, fillField(field{ + name: name, + tag: tagged, + index: index, + typ: ft, + omitEmpty: opts.Contains(tagOmitEmpty), + collapsible: opts.Contains(tagCollapsible), + quoted: quoted, + })) + if count[f.typ] > 1 { + // If there were multiple instances, add a second, + // so that the annihilation code will see a duplicate. + // It only cares about the distinction between 1 or 2, + // so don't bother generating any more copies. + fields = append(fields, fields[len(fields)-1]) + } + continue + } + + // Record new anonymous struct to explore in next round. + nextCount[ft]++ + if nextCount[ft] == 1 { + next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft})) + } + } + } + } + + sort.Slice(fields, func(i, j int) bool { + x := fields + // sort field by name, breaking ties with depth, then + // breaking ties with "name came from json tag", then + // breaking ties with index sequence. + if x[i].name != x[j].name { + return x[i].name < x[j].name + } + if len(x[i].index) != len(x[j].index) { + return len(x[i].index) < len(x[j].index) + } + if x[i].tag != x[j].tag { + return x[i].tag + } + return byIndex(x).Less(i, j) + }) + + // Delete all fields that are hidden by the Go rules for embedded fields, + // except that fields with JSON tags are promoted. + + // The fields are sorted in primary order of name, secondary order + // of field index length. Loop over names; for each name, delete + // hidden fields by choosing the one dominant field that survives. + out := fields[:0] + for advance, i := 0, 0; i < len(fields); i += advance { + // One iteration per name. + // Find the sequence of fields with the name of this first field. + fi := fields[i] + name := fi.name + for advance = 1; i+advance < len(fields); advance++ { + fj := fields[i+advance] + if fj.name != name { + break + } + } + if advance == 1 { // Only one field with this name + out = append(out, fi) + continue + } + dominant, ok := dominantField(fields[i : i+advance]) + if ok { + out = append(out, dominant) + } + } + + fields = out + sort.Sort(byIndex(fields)) + + return fields +} + +// isExported reports whether the identifier is exported. +func isExported(id string) bool { + r, _ := utf8.DecodeRuneInString(id) + return unicode.IsUpper(r) +} + +// dominantField looks through the fields, all of which are known to +// have the same name, to find the single field that dominates the +// others using Go's embedding rules, modified by the presence of +// JSON tags. If there are multiple top-level fields, the boolean +// will be false: This condition is an error in Go and we skip all +// the fields. +func dominantField(fields []field) (field, bool) { + // The fields are sorted in increasing index-length order. The winner + // must therefore be one with the shortest index length. Drop all + // longer entries, which is easy: just truncate the slice. + length := len(fields[0].index) + tagged := -1 // Index of first tagged field. + for i, f := range fields { + if len(f.index) > length { + fields = fields[:i] + break + } + if f.tag { + if tagged >= 0 { + // Multiple tagged fields at the same level: conflict. + // Return no field. + return field{}, false + } + tagged = i + } + } + if tagged >= 0 { + return fields[tagged], true + } + // All remaining fields have the same length. If there's more than one, + // we have a conflict (two fields named "X" at the same level) and we + // return no field. + if len(fields) > 1 { + return field{}, false + } + return fields[0], true +} + +var fieldCache struct { + value atomic.Value // map[reflect.Type][]field + mu sync.Mutex // used only by writers +} + +// cachedTypeFields is like typeFields but uses a cache to avoid repeated work. +func cachedTypeFields(t reflect.Type) []field { + m, _ := fieldCache.value.Load().(map[reflect.Type][]field) + f := m[t] + if f != nil { + return f + } + + // Compute fields without lock. + // Might duplicate effort but won't hold other computations back. + f = typeFields(t) + if f == nil { + f = []field{} + } + + fieldCache.mu.Lock() + m, _ = fieldCache.value.Load().(map[reflect.Type][]field) + newM := make(map[reflect.Type][]field, len(m)+1) + for k, v := range m { + newM[k] = v + } + newM[t] = f + fieldCache.value.Store(newM) + fieldCache.mu.Unlock() + return f +} diff --git a/encode_test.go b/encode_test.go new file mode 100644 index 0000000..8ca691a --- /dev/null +++ b/encode_test.go @@ -0,0 +1,156 @@ +package jsonld + +import ( + "bytes" + "fmt" + "reflect" + "strings" + "testing" +) + +type mockBase struct { + Id string + Name string + Type string +} + +type mockTypeA struct { + mockBase + PropA string + PropB float32 +} + +func TestMarshal(t *testing.T) { + a := mockTypeA{mockBase{"base_id", "MockObjA", "mock_obj"}, "prop_a", 0.001} + b := mockTypeA{} + + url := "http://www.habarnam.ro" + p := WithContext(IRI(url)) + + var err error + var out []byte + + out, err = p.Marshal(a) + if err != nil { + t.Errorf("%s", err) + } + if !strings.Contains(string(out), string(ContextKw)) { + t.Errorf("Context name not found %q in %s", ContextKw, out) + } + if !strings.Contains(string(out), url) { + t.Errorf("Context url not found %q in %s", url, out) + } + err = Unmarshal(out, &b) + if err != nil { + t.Errorf("%s", err) + } + if a.Id != b.Id { + t.Errorf("Id isn't equal %q expected %q in %s", a.Id, b.Id, out) + } + if a.Name != b.Name { + t.Errorf("Name isn't equal %q expected %q", a.Name, b.Name) + } + if a.Type != b.Type { + t.Errorf("Type isn't equal %q expected %q", a.Type, b.Type) + } + if a.PropA != b.PropA { + t.Errorf("PropA isn't equal %q expected %q", a.PropA, b.PropA) + } + if a.PropB != b.PropB { + t.Errorf("PropB isn't equal %f expected %f", a.PropB, b.PropB) + } +} + +func TestMarshalNullContext(t *testing.T) { + var a = struct { + PropA string + PropB float64 + }{"test", 0.0004} + + outL, errL := Marshal(a) + if errL != nil { + t.Errorf("%s", errL) + } + outJ, errJ := Marshal(a) + if errJ != nil { + t.Errorf("%s", errJ) + } + if !bytes.Equal(outL, outJ) { + t.Errorf("Json output should be equal %q, received %q", outL, outJ) + } +} + +func TestMarshalNullValue(t *testing.T) { + var a = interface{}(nil) + + url := "http://www.habarnam.ro" + p := WithContext(IRI(url)) + outL, errL := p.Marshal(a) + if errL != nil { + t.Errorf("%s", errL) + } + outJ := []byte(fmt.Sprintf(`{"@context":"%s"}`, url)) + if !bytes.Equal(outL, outJ) { + t.Errorf("JsonLD output is wrong: %s, expected %s", outL, outJ) + } +} + +func TestIsEmpty(t *testing.T) { + var a int + if !isEmptyValue(reflect.ValueOf(a)) { + t.Errorf("Invalid empty value %v", a) + } + if !isEmptyValue(reflect.ValueOf(uint(a))) { + t.Errorf("Invalid empty value %v", uint(a)) + } + var b float64 + if !isEmptyValue(reflect.ValueOf(b)) { + t.Errorf("Invalid empty value %v", b) + } + var c string + if !isEmptyValue(reflect.ValueOf(c)) { + t.Errorf("Invalid empty value %s", c) + } + var d []byte + if !isEmptyValue(reflect.ValueOf(d)) { + t.Errorf("Invalid empty value %v", d) + } + var e *interface{} + if !isEmptyValue(reflect.ValueOf(e)) { + t.Errorf("Invalid empty value %v", e) + } + g := false + if !isEmptyValue(reflect.ValueOf(g)) { + t.Errorf("Invalid empty value %v", g) + } + h := true + if isEmptyValue(reflect.ValueOf(h)) { + t.Errorf("Invalid empty value %v", h) + } +} + +func TestWithContext_MarshalJSON(t *testing.T) { + tv := "value_test" + v := struct{ Test string }{Test: tv} + + data, err := WithContext(IRI("http://example.com")).Marshal(v) + if err != nil { + t.Error(err) + } + if !bytes.Contains(data, []byte(ContextKw)) { + t.Errorf("%q not found in %s", ContextKw, data) + } + m := reflect.TypeOf(v) + mv := reflect.ValueOf(v) + for i := 0; i < m.NumField(); i++ { + f := m.Field(i) + v := mv.Field(i) + if !bytes.Contains(data, []byte(f.Name)) { + t.Errorf("%q not found in %s", f.Name, data) + } + if !bytes.Contains(data, []byte(v.String())) { + t.Errorf("%q not found in %s", v.String(), data) + } + } + +} diff --git a/fold.go b/fold.go new file mode 100644 index 0000000..465406d --- /dev/null +++ b/fold.go @@ -0,0 +1,143 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonld + +import ( + "bytes" + "unicode/utf8" +) + +const ( + caseMask = ^byte(0x20) // Mask to ignore case in ASCII. + kelvin = '\u212a' + smallLongEss = '\u017f' +) + +// foldFunc returns one of four different case folding equivalence +// functions, from most general (and slow) to fastest: +// +// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8 +// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S') +// 3) asciiEqualFold, no special, but includes non-letters (including _) +// 4) simpleLetterEqualFold, no specials, no non-letters. +// +// The letters S and K are special because they map to 3 runes, not just 2: +// * S maps to s and to U+017F 'ſ' Latin small letter long s +// * k maps to K and to U+212A 'K' Kelvin sign +// See https://play.golang.org/p/tTxjOc0OGo +// +// The returned function is specialized for matching against s and +// should only be given s. It's not curried for performance reasons. +func foldFunc(s []byte) func(s, t []byte) bool { + nonLetter := false + special := false // special letter + for _, b := range s { + if b >= utf8.RuneSelf { + return bytes.EqualFold + } + upper := b & caseMask + if upper < 'A' || upper > 'Z' { + nonLetter = true + } else if upper == 'K' || upper == 'S' { + // See above for why these letters are special. + special = true + } + } + if special { + return equalFoldRight + } + if nonLetter { + return asciiEqualFold + } + return simpleLetterEqualFold +} + +// equalFoldRight is a specialization of bytes.EqualFold when s is +// known to be all ASCII (including punctuation), but contains an 's', +// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t. +// See comments on foldFunc. +func equalFoldRight(s, t []byte) bool { + for _, sb := range s { + if len(t) == 0 { + return false + } + tb := t[0] + if tb < utf8.RuneSelf { + if sb != tb { + sbUpper := sb & caseMask + if 'A' <= sbUpper && sbUpper <= 'Z' { + if sbUpper != tb&caseMask { + return false + } + } else { + return false + } + } + t = t[1:] + continue + } + // sb is ASCII and t is not. t must be either kelvin + // sign or long s; sb must be s, S, k, or K. + tr, size := utf8.DecodeRune(t) + switch sb { + case 's', 'S': + if tr != smallLongEss { + return false + } + case 'k', 'K': + if tr != kelvin { + return false + } + default: + return false + } + t = t[size:] + + } + if len(t) > 0 { + return false + } + return true +} + +// asciiEqualFold is a specialization of bytes.EqualFold for use when +// s is all ASCII (but may contain non-letters) and contains no +// special-folding letters. +// See comments on foldFunc. +func asciiEqualFold(s, t []byte) bool { + if len(s) != len(t) { + return false + } + for i, sb := range s { + tb := t[i] + if sb == tb { + continue + } + if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') { + if sb&caseMask != tb&caseMask { + return false + } + } else { + return false + } + } + return true +} + +// simpleLetterEqualFold is a specialization of bytes.EqualFold for +// use when s is all ASCII letters (no underscores, etc) and also +// doesn't contain 'k', 'K', 's', or 'S'. +// See comments on foldFunc. +func simpleLetterEqualFold(s, t []byte) bool { + if len(s) != len(t) { + return false + } + for i, b := range s { + if b&caseMask != t[i]&caseMask { + return false + } + } + return true +} diff --git a/fold_test.go b/fold_test.go new file mode 100644 index 0000000..2fe8592 --- /dev/null +++ b/fold_test.go @@ -0,0 +1,116 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonld + +import ( + "bytes" + "strings" + "testing" + "unicode/utf8" +) + +var foldTests = []struct { + fn func(s, t []byte) bool + s, t string + want bool +}{ + {equalFoldRight, "", "", true}, + {equalFoldRight, "a", "a", true}, + {equalFoldRight, "", "a", false}, + {equalFoldRight, "a", "", false}, + {equalFoldRight, "a", "A", true}, + {equalFoldRight, "AB", "ab", true}, + {equalFoldRight, "AB", "ac", false}, + {equalFoldRight, "sbkKc", "ſbKKc", true}, + {equalFoldRight, "SbKkc", "ſbKKc", true}, + {equalFoldRight, "SbKkc", "ſbKK", false}, + {equalFoldRight, "e", "é", false}, + {equalFoldRight, "s", "S", true}, + + {simpleLetterEqualFold, "", "", true}, + {simpleLetterEqualFold, "abc", "abc", true}, + {simpleLetterEqualFold, "abc", "ABC", true}, + {simpleLetterEqualFold, "abc", "ABCD", false}, + {simpleLetterEqualFold, "abc", "xxx", false}, + + {asciiEqualFold, "a_B", "A_b", true}, + {asciiEqualFold, "aa@", "aa`", false}, // verify 0x40 and 0x60 aren't case-equivalent +} + +func TestFold(t *testing.T) { + for i, tt := range foldTests { + if got := tt.fn([]byte(tt.s), []byte(tt.t)); got != tt.want { + t.Errorf("%d. %q, %q = %v; want %v", i, tt.s, tt.t, got, tt.want) + } + truth := strings.EqualFold(tt.s, tt.t) + if truth != tt.want { + t.Errorf("strings.EqualFold doesn't agree with case %d", i) + } + } +} + +func TestFoldAgainstUnicode(t *testing.T) { + const bufSize = 5 + buf1 := make([]byte, 0, bufSize) + buf2 := make([]byte, 0, bufSize) + var runes []rune + for i := 0x20; i <= 0x7f; i++ { + runes = append(runes, rune(i)) + } + runes = append(runes, kelvin, smallLongEss) + + funcs := []struct { + name string + fold func(s, t []byte) bool + letter bool // must be ASCII letter + simple bool // must be simple ASCII letter (not 'S' or 'K') + }{ + { + name: "equalFoldRight", + fold: equalFoldRight, + }, + { + name: "asciiEqualFold", + fold: asciiEqualFold, + simple: true, + }, + { + name: "simpleLetterEqualFold", + fold: simpleLetterEqualFold, + simple: true, + letter: true, + }, + } + + for _, ff := range funcs { + for _, r := range runes { + if r >= utf8.RuneSelf { + continue + } + if ff.letter && !isASCIILetter(byte(r)) { + continue + } + if ff.simple && (r == 's' || r == 'S' || r == 'k' || r == 'K') { + continue + } + for _, r2 := range runes { + buf1 := append(buf1[:0], 'x') + buf2 := append(buf2[:0], 'x') + buf1 = buf1[:1+utf8.EncodeRune(buf1[1:bufSize], r)] + buf2 = buf2[:1+utf8.EncodeRune(buf2[1:bufSize], r2)] + buf1 = append(buf1, 'x') + buf2 = append(buf2, 'x') + want := bytes.EqualFold(buf1, buf2) + if got := ff.fold(buf1, buf2); got != want { + t.Errorf("%s(%q, %q) = %v; want %v", ff.name, buf1, buf2, got, want) + } + } + } + } +} + +func isASCIILetter(b byte) bool { + return ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..4240e57 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/go-ap/jsonld + +go 1.13 diff --git a/scanner.go b/scanner.go new file mode 100644 index 0000000..ce02ac8 --- /dev/null +++ b/scanner.go @@ -0,0 +1,632 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonld + +// JSON value parser state machine. +// Just about at the limit of what is reasonable to write by hand. +// Some parts are a bit tedious, but overall it nicely factors out the +// otherwise common code from the multiple scanning functions +// in this package (Compact, Indent, checkValid, nextValue, etc). +// +// This file starts with two simple examples using the scanner +// before diving into the scanner itself. + +import ( + "strconv" +) + +// Valid reports whether data is a valid JSON encoding. +func Valid(data []byte) bool { + return checkValid(data, &scanner{}) == nil +} + +// checkValid verifies that data is valid JSON-encoded data. +// scan is passed in for use by checkValid to avoid an allocation. +func checkValid(data []byte, scan *scanner) error { + scan.reset() + for _, c := range data { + scan.bytes++ + if scan.step(scan, c) == scanError { + return scan.err + } + } + if scan.eof() == scanError { + return scan.err + } + return nil +} + +// nextValue splits data after the next whole JSON value, +// returning that value and the bytes that follow it as separate slices. +// scan is passed in for use by nextValue to avoid an allocation. +func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) { + scan.reset() + for i, c := range data { + v := scan.step(scan, c) + if v >= scanEndObject { + switch v { + // probe the scanner with a space to determine whether we will + // get scanEnd on the next character. Otherwise, if the next character + // is not a space, scanEndTop allocates a needless error. + case scanEndObject, scanEndArray: + if scan.step(scan, ' ') == scanEnd { + return data[:i+1], data[i+1:], nil + } + case scanError: + return nil, nil, scan.err + case scanEnd: + return data[:i], data[i:], nil + } + } + } + if scan.eof() == scanError { + return nil, nil, scan.err + } + return data, nil, nil +} + +// A SyntaxError is a description of a JSON syntax error. +type SyntaxError struct { + msg string // description of error + Offset int64 // error occurred after reading Offset bytes +} + +func (e *SyntaxError) Error() string { return e.msg } + +// A scanner is a JSON scanning state machine. +// Callers call scan.reset() and then pass bytes in one at a time +// by calling scan.step(&scan, c) for each byte. +// The return value, referred to as an opcode, tells the +// caller about significant parsing events like beginning +// and ending literals, objects, and arrays, so that the +// caller can follow along if it wishes. +// The return value scanEnd indicates that a single top-level +// JSON value has been completed, *before* the byte that +// just got passed in. (The indication must be delayed in order +// to recognize the end of numbers: is 123 a whole value or +// the beginning of 12345e+6?). +type scanner struct { + // The step is a func to be called to execute the next transition. + // Also tried using an integer constant and a single func + // with a switch, but using the func directly was 10% faster + // on a 64-bit Mac Mini, and it's nicer to read. + step func(*scanner, byte) int + + // Reached end of top-level value. + endTop bool + + // Stack of what we're in the middle of - array values, object keys, object values. + parseState []int + + // Error that happened, if any. + err error + + // 1-byte redo (see undo method) + redo bool + redoCode int + redoState func(*scanner, byte) int + + // total bytes consumed, updated by decoder.Decode + bytes int64 +} + +// These values are returned by the state transition functions +// assigned to scanner.state and the method scanner.eof. +// They give details about the current state of the scan that +// callers might be interested to know about. +// It is okay to Ignore the return value of any particular +// call to scanner.state: if one call returns scanError, +// every subsequent call will return scanError too. +const ( + // Continue. + scanContinue = iota // uninteresting byte + scanBeginLiteral // end implied by next result != scanContinue + scanBeginObject // begin object + scanObjectKey // just finished object key (string) + scanObjectValue // just finished non-last object value + scanEndObject // end object (implies scanObjectValue if possible) + scanBeginArray // begin array + scanArrayValue // just finished array value + scanEndArray // end array (implies scanArrayValue if possible) + scanSkipSpace // space byte; can skip; known to be last "continue" result + + // Stop. + scanEnd // top-level value ended *before* this byte; known to be first "stop" result + scanError // hit an error, scanner.err. + + scanFindType // need to find the "jsonld:type" element of the object. +) + +// These values are stored in the parseState stack. +// They give the current state of a composite value +// being scanned. If the parser is inside a nested value +// the parseState describes the nested state, outermost at entry 0. +const ( + parseObjectKey = iota // parsing object key (before colon) + parseObjectValue // parsing object value (after colon) + parseArrayValue // parsing array value +) + +// reset prepares the scanner for use. +// It must be called before calling s.step. +func (s *scanner) reset() { + s.step = stateBeginValue + s.parseState = s.parseState[0:0] + s.err = nil + s.redo = false + s.endTop = false +} + +// eof tells the scanner that the end of input has been reached. +// It returns a scan status just as s.step does. +func (s *scanner) eof() int { + if s.err != nil { + return scanError + } + if s.endTop { + return scanEnd + } + s.step(s, ' ') + if s.endTop { + return scanEnd + } + if s.err == nil { + s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} + } + return scanError +} + +// pushParseState pushes a new parse state p onto the parse stack. +func (s *scanner) pushParseState(p int) { + s.parseState = append(s.parseState, p) +} + +// popParseState pops a parse state (already obtained) off the stack +// and updates s.step accordingly. +func (s *scanner) popParseState() { + n := len(s.parseState) - 1 + s.parseState = s.parseState[0:n] + s.redo = false + if n == 0 { + s.step = stateEndTop + s.endTop = true + } else { + s.step = stateEndValue + } +} + +func isSpace(c byte) bool { + return c == ' ' || c == '\t' || c == '\r' || c == '\n' +} + +// stateBeginValueOrEmpty is the state after reading `[`. +func stateBeginValueOrEmpty(s *scanner, c byte) int { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + if c == ']' { + return stateEndValue(s, c) + } + return stateBeginValue(s, c) +} + +// stateBeginValue is the state at the beginning of the input. +func stateBeginValue(s *scanner, c byte) int { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + switch c { + case '{': + s.step = stateBeginStringOrEmpty + s.pushParseState(parseObjectKey) + return scanBeginObject + case '[': + s.step = stateBeginValueOrEmpty + s.pushParseState(parseArrayValue) + return scanBeginArray + case '"': + s.step = stateInString + return scanBeginLiteral + case '-': + s.step = stateNeg + return scanBeginLiteral + case '0': // beginning of 0.123 + s.step = state0 + return scanBeginLiteral + case 't': // beginning of true + s.step = stateT + return scanBeginLiteral + case 'f': // beginning of false + s.step = stateF + return scanBeginLiteral + case 'n': // beginning of null + s.step = stateN + return scanBeginLiteral + } + if '1' <= c && c <= '9' { // beginning of 1234.5 + s.step = state1 + return scanBeginLiteral + } + return s.error(c, "looking for beginning of value") +} + +// stateBeginStringOrEmpty is the state after reading `{`. +func stateBeginStringOrEmpty(s *scanner, c byte) int { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + if c == '}' { + n := len(s.parseState) + s.parseState[n-1] = parseObjectValue + return stateEndValue(s, c) + } + return stateBeginString(s, c) +} + +// stateBeginString is the state after reading `{"key": value,`. +func stateBeginString(s *scanner, c byte) int { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + if c == '"' { + s.step = stateInString + return scanBeginLiteral + } + return s.error(c, "looking for beginning of object key string") +} + +// stateEndValue is the state after completing a value, +// such as after reading `{}` or `true` or `["x"`. +func stateEndValue(s *scanner, c byte) int { + n := len(s.parseState) + if n == 0 { + // Completed top-level before the current byte. + s.step = stateEndTop + s.endTop = true + return stateEndTop(s, c) + } + if c <= ' ' && isSpace(c) { + s.step = stateEndValue + return scanSkipSpace + } + ps := s.parseState[n-1] + switch ps { + case parseObjectKey: + if c == ':' { + s.parseState[n-1] = parseObjectValue + s.step = stateBeginValue + return scanObjectKey + } + return s.error(c, "after object key") + case parseObjectValue: + if c == ',' { + s.parseState[n-1] = parseObjectKey + s.step = stateBeginString + return scanObjectValue + } + if c == '}' { + s.popParseState() + return scanEndObject + } + return s.error(c, "after object key:value pair") + case parseArrayValue: + if c == ',' { + s.step = stateBeginValue + return scanArrayValue + } + if c == ']' { + s.popParseState() + return scanEndArray + } + return s.error(c, "after array element") + } + return s.error(c, "") +} + +// stateEndTop is the state after finishing the top-level value, +// such as after reading `{}` or `[1,2,3]`. +// Only space characters should be seen now. +func stateEndTop(s *scanner, c byte) int { + if c != ' ' && c != '\t' && c != '\r' && c != '\n' { + // Complain about non-space byte on next call. + s.error(c, "after top-level value") + } + return scanEnd +} + +// stateInString is the state after reading `"`. +func stateInString(s *scanner, c byte) int { + if c == '"' { + s.step = stateEndValue + return scanContinue + } + if c == '\\' { + s.step = stateInStringEsc + return scanContinue + } + if c < 0x20 { + return s.error(c, "in string literal") + } + return scanContinue +} + +// stateInStringEsc is the state after reading `"\` during a quoted string. +func stateInStringEsc(s *scanner, c byte) int { + switch c { + case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': + s.step = stateInString + return scanContinue + case 'u': + s.step = stateInStringEscU + return scanContinue + } + return s.error(c, "in string escape code") +} + +// stateInStringEscU is the state after reading `"\u` during a quoted string. +func stateInStringEscU(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU1 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. +func stateInStringEscU1(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU12 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. +func stateInStringEscU12(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU123 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. +func stateInStringEscU123(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInString + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateNeg is the state after reading `-` during a number. +func stateNeg(s *scanner, c byte) int { + if c == '0' { + s.step = state0 + return scanContinue + } + if '1' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return s.error(c, "in numeric literal") +} + +// state1 is the state after reading a non-zero integer during a number, +// such as after reading `1` or `100` but not `0`. +func state1(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return state0(s, c) +} + +// state0 is the state after reading `0` during a number. +func state0(s *scanner, c byte) int { + if c == '.' { + s.step = stateDot + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateDot is the state after reading the integer and decimal point in a number, +// such as after reading `1.`. +func stateDot(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateDot0 + return scanContinue + } + return s.error(c, "after decimal point in numeric literal") +} + +// stateDot0 is the state after reading the integer, decimal point, and subsequent +// digits of a number, such as after reading `3.14`. +func stateDot0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateE is the state after reading the mantissa and e in a number, +// such as after reading `314e` or `0.314e`. +func stateE(s *scanner, c byte) int { + if c == '+' || c == '-' { + s.step = stateESign + return scanContinue + } + return stateESign(s, c) +} + +// stateESign is the state after reading the mantissa, e, and sign in a number, +// such as after reading `314e-` or `0.314e+`. +func stateESign(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateE0 + return scanContinue + } + return s.error(c, "in exponent of numeric literal") +} + +// stateE0 is the state after reading the mantissa, e, optional sign, +// and at least one digit of the exponent in a number, +// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. +func stateE0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + return stateEndValue(s, c) +} + +// stateT is the state after reading `t`. +func stateT(s *scanner, c byte) int { + if c == 'r' { + s.step = stateTr + return scanContinue + } + return s.error(c, "in literal true (expecting 'r')") +} + +// stateTr is the state after reading `tr`. +func stateTr(s *scanner, c byte) int { + if c == 'u' { + s.step = stateTru + return scanContinue + } + return s.error(c, "in literal true (expecting 'u')") +} + +// stateTru is the state after reading `tru`. +func stateTru(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal true (expecting 'e')") +} + +// stateF is the state after reading `f`. +func stateF(s *scanner, c byte) int { + if c == 'a' { + s.step = stateFa + return scanContinue + } + return s.error(c, "in literal false (expecting 'a')") +} + +// stateFa is the state after reading `fa`. +func stateFa(s *scanner, c byte) int { + if c == 'l' { + s.step = stateFal + return scanContinue + } + return s.error(c, "in literal false (expecting 'l')") +} + +// stateFal is the state after reading `fal`. +func stateFal(s *scanner, c byte) int { + if c == 's' { + s.step = stateFals + return scanContinue + } + return s.error(c, "in literal false (expecting 's')") +} + +// stateFals is the state after reading `fals`. +func stateFals(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal false (expecting 'e')") +} + +// stateN is the state after reading `n`. +func stateN(s *scanner, c byte) int { + if c == 'u' { + s.step = stateNu + return scanContinue + } + return s.error(c, "in literal null (expecting 'u')") +} + +// stateNu is the state after reading `nu`. +func stateNu(s *scanner, c byte) int { + if c == 'l' { + s.step = stateNul + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateNul is the state after reading `nul`. +func stateNul(s *scanner, c byte) int { + if c == 'l' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateError is the state after reaching a syntax error, +// such as after reading `[1}` or `5.1.2`. +func stateError(s *scanner, c byte) int { + return scanError +} + +// error records an error and switches to the error state. +func (s *scanner) error(c byte, context string) int { + s.step = stateError + s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} + return scanError +} + +// quoteChar formats c as a quoted character literal +func quoteChar(c byte) string { + // special cases - different from quoted strings + if c == '\'' { + return `'\''` + } + if c == '"' { + return `'"'` + } + + // use quoted string with different quotation marks + s := strconv.Quote(string(c)) + return "'" + s[1:len(s)-1] + "'" +} + +// undo causes the scanner to return scanCode from the next state transition. +// This gives callers a simple 1-byte undo mechanism. +func (s *scanner) undo(scanCode int) { + if s.redo { + panic("json: invalid use of scanner") + } + s.redoCode = scanCode + s.redoState = s.step + s.step = stateRedo + s.redo = true +} + +// stateRedo helps implement the scanner's 1-byte undo. +func stateRedo(s *scanner, c byte) int { + s.redo = false + s.step = s.redoState + return s.redoCode +} diff --git a/scanner_test.go b/scanner_test.go new file mode 100644 index 0000000..946e885 --- /dev/null +++ b/scanner_test.go @@ -0,0 +1,29 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonld + +import ( + "testing" +) + +var validTests = []struct { + data string + ok bool +}{ + {`foo`, false}, + {`}{`, false}, + {`{]`, false}, + {`{}`, true}, + {`{"foo":"bar"}`, true}, + {`{"foo":"bar","bar":{"baz":["qux"]}}`, true}, +} + +func TestValid(t *testing.T) { + for _, tt := range validTests { + if ok := Valid([]byte(tt.data)); ok != tt.ok { + t.Errorf("Valid(%#q) = %v, want %v", tt.data, ok, tt.ok) + } + } +} diff --git a/tables.go b/tables.go new file mode 100644 index 0000000..8e65072 --- /dev/null +++ b/tables.go @@ -0,0 +1,218 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonld + +import "unicode/utf8" + +// safeSet holds the value true if the ASCII character with the given array +// position can be represented inside a JSON string without any further +// escaping. +// +// All values are true except for the ASCII control characters (0-31), the +// double quote ("), and the backslash character ("\"). +var safeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': true, + '=': true, + '>': true, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} + +// htmlSafeSet holds the value true if the ASCII character with the given +// array position can be safely represented inside a JSON string, embedded +// inside of HTML