// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:generate go run genarsc.go //go:generate stringer -output binres_string.go -type ResType,DataType // Package binres implements encoding and decoding of android binary resources. // // Binary resource structs support unmarshalling the binary output of aapt. // Implementations of marshalling for each struct must produce the exact input // sent to unmarshalling. This allows tests to validate each struct representation // of the binary format as follows: // // - unmarshal the output of aapt // - marshal the struct representation // - perform byte-to-byte comparison with aapt output per chunk header and body // // This process should strive to make structs idiomatic to make parsing xml text // into structs trivial. // // Once the struct representation is validated, tests for parsing xml text // into structs can become self-referential as the following holds true: // // - the unmarshalled input of aapt output is the only valid target // - the unmarshalled input of xml text may be compared to the unmarshalled // input of aapt output to identify errors, e.g. text-trims, wrong flags, etc // // This provides validation, byte-for-byte, for producing binary xml resources. // // It should be made clear that unmarshalling binary resources is currently only // in scope for proving that the BinaryMarshaler works correctly. Any other use // is currently out of scope. // // A simple view of binary xml document structure: // // XML // Pool // Map // Namespace // [...node] // // Additional resources: // https://android.googlesource.com/platform/frameworks/base/+/master/libs/androidfw/include/androidfw/ResourceTypes.h // https://justanapplication.wordpress.com/2011/09/13/ (a series of articles, increment date) package binres import ( "encoding" "encoding/binary" "encoding/xml" "fmt" "io" "sort" "strconv" "strings" "unicode" ) func errWrongType(have ResType, want ...ResType) error { return fmt.Errorf("wrong resource type %s, want one of %v", have, want) } type ResType uint16 func (t ResType) IsSupported() bool { return t != ResNull } // explicitly defined for clarity and resolvability with apt source const ( ResNull ResType = 0x0000 ResStringPool ResType = 0x0001 ResTable ResType = 0x0002 ResXML ResType = 0x0003 ResXMLStartNamespace ResType = 0x0100 ResXMLEndNamespace ResType = 0x0101 ResXMLStartElement ResType = 0x0102 ResXMLEndElement ResType = 0x0103 ResXMLCharData ResType = 0x0104 ResXMLResourceMap ResType = 0x0180 ResTablePackage ResType = 0x0200 ResTableType ResType = 0x0201 ResTableTypeSpec ResType = 0x0202 ResTableLibrary ResType = 0x0203 ResTableOverlayable ResType = 0x0204 ResTableOverlayablePolicy ResType = 0x0205 ResTableStagedAlias ResType = 0x0206 ) var ( btou16 = binary.LittleEndian.Uint16 btou32 = binary.LittleEndian.Uint32 putu16 = binary.LittleEndian.PutUint16 putu32 = binary.LittleEndian.PutUint32 ) // unmarshaler wraps BinaryUnmarshaler to provide byte size of decoded chunks. type unmarshaler interface { encoding.BinaryUnmarshaler // size returns the byte size unmarshalled after a call to // UnmarshalBinary, or otherwise zero. size() int } // chunkHeader appears at the front of every data chunk in a resource. type chunkHeader struct { // Type of data that follows this header. typ ResType // Advance slice index by this value to find its associated data, if any. headerByteSize uint16 // This is the header size plus the size of any data associated with the chunk. // Advance slice index by this value to completely skip its contents, including // any child chunks. If this value is the same as headerByteSize, there is // no data associated with the chunk. byteSize uint32 } // size implements unmarshaler. func (hdr chunkHeader) size() int { return int(hdr.byteSize) } func (hdr *chunkHeader) UnmarshalBinary(bin []byte) error { hdr.typ = ResType(btou16(bin)) if !hdr.typ.IsSupported() { return fmt.Errorf("%s not supported", hdr.typ) } hdr.headerByteSize = btou16(bin[2:]) hdr.byteSize = btou32(bin[4:]) if len(bin) < int(hdr.byteSize) { return fmt.Errorf("too few bytes to unmarshal chunk body, have %v, need at-least %v", len(bin), hdr.byteSize) } return nil } func (hdr chunkHeader) MarshalBinary() ([]byte, error) { if !hdr.typ.IsSupported() { return nil, fmt.Errorf("%s not supported", hdr.typ) } bin := make([]byte, 8) putu16(bin, uint16(hdr.typ)) putu16(bin[2:], hdr.headerByteSize) putu32(bin[4:], hdr.byteSize) return bin, nil } type XML struct { chunkHeader Pool *Pool Map *Map Namespace *Namespace Children []*Element // tmp field used when unmarshalling binary stack []*Element } // RawValueByName returns the original raw string value of first matching element attribute, or error if not exists. // Given then RawValueByName("manifest", xml.Name{Local: "package"}) returns "VAL". func (bx *XML) RawValueByName(elname string, attrname xml.Name) (string, error) { elref, err := bx.Pool.RefByName(elname) if err != nil { return "", err } nref, err := bx.Pool.RefByName(attrname.Local) if err != nil { return "", err } nsref := PoolRef(NoEntry) if attrname.Space != "" { nsref, err = bx.Pool.RefByName(attrname.Space) if err != nil { return "", err } } for el := range bx.iterElements() { if el.Name == elref { for _, attr := range el.attrs { // TODO enforce TypedValue DataString constraint? if nsref == attr.NS && nref == attr.Name { return bx.Pool.strings[int(attr.RawValue)], nil } } } } return "", fmt.Errorf("no matching element %q for attribute %+v found", elname, attrname) } const ( androidSchema = "http://schemas.android.com/apk/res/android" toolsSchema = "http://schemas.android.com/tools" ) // skipSynthesize is set true for tests to avoid synthesis of additional nodes and attributes. var skipSynthesize bool // UnmarshalXML decodes an AndroidManifest.xml document returning type XML // containing decoded resources. func UnmarshalXML(r io.Reader, withIcon bool) (*XML, error) { tbl, err := OpenTable() if err != nil { return nil, err } lr := &lineReader{r: r} dec := xml.NewDecoder(lr) bx := new(XML) // temporary pool to resolve real poolref later pool := new(Pool) type ltoken struct { xml.Token line int } var q []ltoken for { line := lr.line(dec.InputOffset()) tkn, err := dec.Token() if err != nil { if err == io.EOF { break } return nil, err } tkn = xml.CopyToken(tkn) switch tkn := tkn.(type) { case xml.StartElement: switch tkn.Name.Local { default: q = append(q, ltoken{tkn, line}) case "uses-sdk": return nil, fmt.Errorf("manual declaration of uses-sdk in AndroidManifest.xml not supported") case "manifest": // synthesize additional attributes and nodes for use during encode. tkn.Attr = append(tkn.Attr, xml.Attr{ Name: xml.Name{ Space: "", Local: "platformBuildVersionCode", }, Value: "16", }, xml.Attr{ Name: xml.Name{ Space: "", Local: "platformBuildVersionName", }, Value: "4.1.2-1425332", }) q = append(q, ltoken{tkn, line}) if !skipSynthesize { s := xml.StartElement{ Name: xml.Name{ Space: "", Local: "uses-sdk", }, Attr: []xml.Attr{ xml.Attr{ Name: xml.Name{ Space: androidSchema, Local: "minSdkVersion", }, Value: fmt.Sprintf("%v", MinSDK), }, }, } e := xml.EndElement{Name: xml.Name{Local: "uses-sdk"}} q = append(q, ltoken{s, line}, ltoken{e, line}) } case "application": if !skipSynthesize { for _, attr := range tkn.Attr { if attr.Name.Space == androidSchema && attr.Name.Local == "icon" { return nil, fmt.Errorf("manual declaration of android:icon in AndroidManifest.xml not supported") } } if withIcon { tkn.Attr = append(tkn.Attr, xml.Attr{ Name: xml.Name{ Space: androidSchema, Local: "icon", }, Value: "@mipmap/icon", }) } } q = append(q, ltoken{tkn, line}) } default: q = append(q, ltoken{tkn, line}) } } for _, ltkn := range q { tkn, line := ltkn.Token, ltkn.line switch tkn := tkn.(type) { case xml.StartElement: el := &Element{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: 0xFFFFFFFF, }, NS: NoEntry, Name: pool.ref(tkn.Name.Local), } if len(bx.stack) == 0 { bx.Children = append(bx.Children, el) } else { n := len(bx.stack) var p *Element p, bx.stack = bx.stack[n-1], bx.stack[:n-1] p.Children = append(p.Children, el) bx.stack = append(bx.stack, p) } bx.stack = append(bx.stack, el) for _, attr := range tkn.Attr { if (attr.Name.Space == "xmlns" && attr.Name.Local == "tools") || attr.Name.Space == toolsSchema { continue // TODO can tbl be queried for schemas to determine validity instead? } if attr.Name.Space == "xmlns" && attr.Name.Local == "android" { if bx.Namespace != nil { return nil, fmt.Errorf("multiple declarations of xmlns:android encountered") } bx.Namespace = &Namespace{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: NoEntry, }, prefix: 0, uri: 0, } continue } nattr := &Attribute{ NS: pool.ref(attr.Name.Space), Name: pool.ref(attr.Name.Local), RawValue: NoEntry, } el.attrs = append(el.attrs, nattr) if attr.Name.Space == "" { nattr.NS = NoEntry // TODO it's unclear how to query these switch attr.Name.Local { case "platformBuildVersionCode": nattr.TypedValue.Type = DataIntDec i, err := strconv.Atoi(attr.Value) if err != nil { return nil, err } nattr.TypedValue.Value = uint32(i) default: // "package", "platformBuildVersionName", and any invalid nattr.RawValue = pool.ref(attr.Value) nattr.TypedValue.Type = DataString } } else { // get type spec and value data type ref, err := tbl.RefByName("attr/" + attr.Name.Local) if err != nil { return nil, err } nt, err := ref.Resolve(tbl) if err != nil { return nil, err } if len(nt.values) == 0 { panic("encountered empty values slice") } if len(nt.values) == 1 { val := nt.values[0] if val.data.Type != DataIntDec { panic("TODO only know how to handle DataIntDec type here") } t := DataType(val.data.Value) switch t { case DataString, DataAttribute, DataType(0x3e): // TODO identify 0x3e, in bootstrap.xml this is the native lib name nattr.RawValue = pool.ref(attr.Value) nattr.TypedValue.Type = DataString nattr.TypedValue.Value = uint32(nattr.RawValue) case DataIntBool, DataType(0x08): nattr.TypedValue.Type = DataIntBool switch attr.Value { case "true": nattr.TypedValue.Value = 0xFFFFFFFF case "false": nattr.TypedValue.Value = 0 default: return nil, fmt.Errorf("invalid bool value %q", attr.Value) } case DataIntDec, DataFloat, DataFraction: // TODO DataFraction needs it's own case statement. minSdkVersion identifies as DataFraction // but has accepted input in the past such as android:minSdkVersion="L" // Other use-cases for DataFraction are currently unknown as applicable to manifest generation // but this provides minimum support for writing out minSdkVersion="15" correctly. nattr.TypedValue.Type = DataIntDec i, err := strconv.Atoi(attr.Value) if err != nil { return nil, err } nattr.TypedValue.Value = uint32(i) case DataReference: nattr.TypedValue.Type = DataReference dref, err := tbl.RefByName(attr.Value) if err != nil { if strings.HasPrefix(attr.Value, "@mipmap") { // firstDrawableId is a TableRef matching first entry of mipmap spec initialized by NewMipmapTable. // 7f is default package, 02 is mipmap spec, 0000 is first entry; e.g. R.drawable.icon // TODO resource table should generate ids as required. const firstDrawableId = 0x7f020000 nattr.TypedValue.Value = firstDrawableId continue } return nil, err } nattr.TypedValue.Value = uint32(dref) default: return nil, fmt.Errorf("unhandled data type %0#2x: %s", uint8(t), t) } } else { // 0x01000000 is an unknown ref that doesn't point to anything, typically // located at the start of entry value lists, peek at last value to determine type. t := nt.values[len(nt.values)-1].data.Type switch t { case DataIntDec: for _, val := range nt.values { if val.name == 0x01000000 { continue } nr, err := val.name.Resolve(tbl) if err != nil { return nil, err } if attr.Value == nr.key.Resolve(tbl.pkgs[0].keyPool) { // TODO hard-coded pkg ref nattr.TypedValue = *val.data break } } case DataIntHex: nattr.TypedValue.Type = t for _, x := range strings.Split(attr.Value, "|") { for _, val := range nt.values { if val.name == 0x01000000 { continue } nr, err := val.name.Resolve(tbl) if err != nil { return nil, err } if x == nr.key.Resolve(tbl.pkgs[0].keyPool) { // TODO hard-coded pkg ref nattr.TypedValue.Value |= val.data.Value break } } } default: return nil, fmt.Errorf("unhandled data type for configuration %0#2x: %s", uint8(t), t) } } } } case xml.CharData: if s := poolTrim(string(tkn)); s != "" { cdt := &CharData{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: NoEntry, }, RawData: pool.ref(s), } el := bx.stack[len(bx.stack)-1] if el.head == nil { el.head = cdt } else if el.tail == nil { el.tail = cdt } else { return nil, fmt.Errorf("element head and tail already contain chardata") } } case xml.EndElement: if tkn.Name.Local == "manifest" { bx.Namespace.end = &Namespace{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: NoEntry, }, prefix: 0, uri: 0, } } n := len(bx.stack) var el *Element el, bx.stack = bx.stack[n-1], bx.stack[:n-1] if el.end != nil { return nil, fmt.Errorf("element end already exists") } el.end = &ElementEnd{ NodeHeader: NodeHeader{ LineNumber: uint32(line), Comment: NoEntry, }, NS: el.NS, Name: el.Name, } case xml.Comment, xml.ProcInst: // discard default: panic(fmt.Errorf("unhandled token type: %T %+v", tkn, tkn)) } } // pools appear to be sorted as follows: // * attribute names prefixed with android: // * "android", [schema-url], [empty-string] // * for each node: // * attribute names with no prefix // * node name // * attribute value if data type of name is DataString, DataAttribute, or 0x3e (an unknown) bx.Pool = new(Pool) var arecurse func(*Element) arecurse = func(el *Element) { for _, attr := range el.attrs { if attr.NS == NoEntry { continue } if attr.NS.Resolve(pool) == androidSchema { bx.Pool.strings = append(bx.Pool.strings, attr.Name.Resolve(pool)) } } for _, child := range el.Children { arecurse(child) } } for _, el := range bx.Children { arecurse(el) } // TODO encoding/xml does not enforce namespace prefix and manifest encoding in aapt // appears to ignore all other prefixes. Inserting this manually is not strictly correct // for the general case, but the effort to do otherwise currently offers nothing. bx.Pool.strings = append(bx.Pool.strings, "android", androidSchema) // there always appears to be an empty string located after schema, even if one is // not present in manifest. bx.Pool.strings = append(bx.Pool.strings, "") var brecurse func(*Element) brecurse = func(el *Element) { for _, attr := range el.attrs { if attr.NS == NoEntry { bx.Pool.strings = append(bx.Pool.strings, attr.Name.Resolve(pool)) } } bx.Pool.strings = append(bx.Pool.strings, el.Name.Resolve(pool)) for _, attr := range el.attrs { if attr.RawValue != NoEntry { bx.Pool.strings = append(bx.Pool.strings, attr.RawValue.Resolve(pool)) } else if attr.NS == NoEntry { bx.Pool.strings = append(bx.Pool.strings, fmt.Sprintf("%+v", attr.TypedValue.Value)) } } if el.head != nil { bx.Pool.strings = append(bx.Pool.strings, el.head.RawData.Resolve(pool)) } if el.tail != nil { bx.Pool.strings = append(bx.Pool.strings, el.tail.RawData.Resolve(pool)) } for _, child := range el.Children { brecurse(child) } } for _, el := range bx.Children { brecurse(el) } // do not eliminate duplicates until the entire slice has been composed. // consider // all attribute names come first followed by values; in such a case, the value "label" // would be a reference to the same "android:label" in the string pool which will occur // within the beginning of the pool where other attr names are located. bx.Pool.strings = asSet(bx.Pool.strings) // TODO consider cases of multiple declarations of the same attr name that should return error // before ever reaching this point. bx.Map = new(Map) for _, s := range bx.Pool.strings { ref, err := tbl.RefByName("attr/" + s) if err != nil { break // break after first non-ref as all strings after are also non-refs. } bx.Map.rs = append(bx.Map.rs, ref) } // resolve tmp pool refs to final pool refs // TODO drop this in favor of sort directly on Table var resolve func(el *Element) resolve = func(el *Element) { if el.NS != NoEntry { el.NS = bx.Pool.ref(el.NS.Resolve(pool)) el.end.NS = el.NS } el.Name = bx.Pool.ref(el.Name.Resolve(pool)) el.end.Name = el.Name for _, attr := range el.attrs { if attr.NS != NoEntry { attr.NS = bx.Pool.ref(attr.NS.Resolve(pool)) } attr.Name = bx.Pool.ref(attr.Name.Resolve(pool)) if attr.RawValue != NoEntry { attr.RawValue = bx.Pool.ref(attr.RawValue.Resolve(pool)) if attr.TypedValue.Type == DataString { attr.TypedValue.Value = uint32(attr.RawValue) } } } for _, child := range el.Children { resolve(child) } } for _, el := range bx.Children { resolve(el) } var asort func(*Element) asort = func(el *Element) { sort.Sort(byType(el.attrs)) sort.Sort(byNamespace(el.attrs)) sort.Sort(byName(el.attrs)) for _, child := range el.Children { asort(child) } } for _, el := range bx.Children { asort(el) } for i, s := range bx.Pool.strings { switch s { case androidSchema: bx.Namespace.uri = PoolRef(i) bx.Namespace.end.uri = PoolRef(i) case "android": bx.Namespace.prefix = PoolRef(i) bx.Namespace.end.prefix = PoolRef(i) } } return bx, nil } // UnmarshalBinary decodes all resource chunks in buf returning any error encountered. func (bx *XML) UnmarshalBinary(buf []byte) error { if err := (&bx.chunkHeader).UnmarshalBinary(buf); err != nil { return err } buf = buf[8:] for len(buf) > 0 { k, err := bx.unmarshalBinaryKind(buf) if err != nil { return err } buf = buf[k.size():] } return nil } // unmarshalBinaryKind decodes and stores the first resource chunk of bin. // It returns the unmarshaler interface and any error encountered. // If k.size() < len(bin), subsequent chunks can be decoded at bin[k.size():]. func (bx *XML) unmarshalBinaryKind(bin []byte) (k unmarshaler, err error) { k, err = bx.kind(ResType(btou16(bin))) if err != nil { return nil, err } if err = k.UnmarshalBinary(bin); err != nil { return nil, err } return k, nil } func (bx *XML) kind(t ResType) (unmarshaler, error) { switch t { case ResStringPool: if bx.Pool != nil { return nil, fmt.Errorf("pool already exists") } bx.Pool = new(Pool) return bx.Pool, nil case ResXMLResourceMap: if bx.Map != nil { return nil, fmt.Errorf("resource map already exists") } bx.Map = new(Map) return bx.Map, nil case ResXMLStartNamespace: if bx.Namespace != nil { return nil, fmt.Errorf("namespace start already exists") } bx.Namespace = new(Namespace) return bx.Namespace, nil case ResXMLEndNamespace: if bx.Namespace.end != nil { return nil, fmt.Errorf("namespace end already exists") } bx.Namespace.end = new(Namespace) return bx.Namespace.end, nil case ResXMLStartElement: el := new(Element) if len(bx.stack) == 0 { bx.Children = append(bx.Children, el) } else { n := len(bx.stack) var p *Element p, bx.stack = bx.stack[n-1], bx.stack[:n-1] p.Children = append(p.Children, el) bx.stack = append(bx.stack, p) } bx.stack = append(bx.stack, el) return el, nil case ResXMLEndElement: n := len(bx.stack) var el *Element el, bx.stack = bx.stack[n-1], bx.stack[:n-1] if el.end != nil { return nil, fmt.Errorf("element end already exists") } el.end = new(ElementEnd) return el.end, nil case ResXMLCharData: // TODO assure correctness cdt := new(CharData) el := bx.stack[len(bx.stack)-1] if el.head == nil { el.head = cdt } else if el.tail == nil { el.tail = cdt } else { return nil, fmt.Errorf("element head and tail already contain chardata") } return cdt, nil default: return nil, fmt.Errorf("unexpected type %s", t) } } func (bx *XML) MarshalBinary() ([]byte, error) { bx.typ = ResXML bx.headerByteSize = 8 var ( bin, b []byte err error ) b, err = bx.chunkHeader.MarshalBinary() if err != nil { return nil, err } bin = append(bin, b...) b, err = bx.Pool.MarshalBinary() if err != nil { return nil, err } bin = append(bin, b...) b, err = bx.Map.MarshalBinary() if err != nil { return nil, err } bin = append(bin, b...) b, err = bx.Namespace.MarshalBinary() if err != nil { return nil, err } bin = append(bin, b...) for _, child := range bx.Children { if err := marshalRecurse(child, &bin); err != nil { return nil, err } } b, err = bx.Namespace.end.MarshalBinary() if err != nil { return nil, err } bin = append(bin, b...) putu32(bin[4:], uint32(len(bin))) return bin, nil } func marshalRecurse(el *Element, bin *[]byte) error { b, err := el.MarshalBinary() if err != nil { return err } *bin = append(*bin, b...) if el.head != nil { b, err := el.head.MarshalBinary() if err != nil { return err } *bin = append(*bin, b...) } for _, child := range el.Children { if err := marshalRecurse(child, bin); err != nil { return err } } b, err = el.end.MarshalBinary() if err != nil { return err } *bin = append(*bin, b...) return nil } func (bx *XML) iterElements() <-chan *Element { ch := make(chan *Element, 1) go func() { for _, el := range bx.Children { iterElementsRecurse(el, ch) } close(ch) }() return ch } func iterElementsRecurse(el *Element, ch chan *Element) { ch <- el for _, e := range el.Children { iterElementsRecurse(e, ch) } } // asSet returns a set from a slice of strings. func asSet(xs []string) []string { m := make(map[string]bool) fo := xs[:0] for _, x := range xs { if !m[x] { m[x] = true fo = append(fo, x) } } return fo } // poolTrim trims all but immediately surrounding space. // \n\t\tfoobar\n\t\t becomes \tfoobar\n func poolTrim(s string) string { var start, end int for i, r := range s { if !unicode.IsSpace(r) { if i != 0 { start = i - 1 // preserve preceding space } break } } for i := len(s) - 1; i >= 0; i-- { r := rune(s[i]) if !unicode.IsSpace(r) { if i != len(s)-1 { end = i + 2 } break } } if start == 0 && end == 0 { return "" // every char was a space } return s[start:end] } // byNamespace sorts attributes based on string pool position of namespace. // Given that "android" always proceeds "" in the pool, this results in the // correct ordering of attributes. type byNamespace []*Attribute func (a byNamespace) Len() int { return len(a) } func (a byNamespace) Less(i, j int) bool { return a[i].NS < a[j].NS } func (a byNamespace) Swap(i, j int) { a[i], a[j] = a[j], a[i] } // byType sorts attributes by the uint8 value of the type. type byType []*Attribute func (a byType) Len() int { return len(a) } func (a byType) Less(i, j int) bool { return a[i].TypedValue.Type < a[j].TypedValue.Type } func (a byType) Swap(i, j int) { a[i], a[j] = a[j], a[i] } // byName sorts attributes that have matching types based on string pool position of name. type byName []*Attribute func (a byName) Len() int { return len(a) } func (a byName) Less(i, j int) bool { return (a[i].TypedValue.Type == DataString || a[i].TypedValue.Type == DataIntDec) && (a[j].TypedValue.Type == DataString || a[j].TypedValue.Type == DataIntDec) && a[i].Name < a[j].Name } func (a byName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } type lineReader struct { off int64 lines []int64 r io.Reader } func (r *lineReader) Read(p []byte) (n int, err error) { n, err = r.r.Read(p) for i := 0; i < n; i++ { if p[i] == '\n' { r.lines = append(r.lines, r.off+int64(i)) } } r.off += int64(n) return n, err } func (r *lineReader) line(pos int64) int { return sort.Search(len(r.lines), func(i int) bool { return pos < r.lines[i] }) + 1 }