1
0
Fork 0

Adding upstream version 3.5.1.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-05-18 18:07:37 +02:00
parent e37d4622a7
commit 097626e61a
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
57 changed files with 6023 additions and 0 deletions

12
.github/FUNDING.yml vendored Normal file
View file

@ -0,0 +1,12 @@
# These are supported funding model platforms
github: [mholt] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']

41
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View file

@ -0,0 +1,41 @@
---
name: Bug report
about: For behaviors which violate documentation or cause incorrect results
title: ''
labels: ''
assignees: ''
---
<!--
This template is for bug reports! (If your issue doesn't fit this template, it's probably a feature request instead.)
To fill out this template, simply replace these comments with your answers.
Please do not skip questions; this will slow down the resolution process.
-->
## What version of the package or command are you using?
<!-- A commit sha or tag is fine -->
## What are you trying to do?
<!-- Please describe clearly what you are trying to do thoroughly enough so that a reader with no context can repeat the same process. -->
## What steps did you take?
<!-- Explain exactly how we can reproduce this bug; attach sample archive files if relevant -->
## What did you expect to happen, and what actually happened instead?
<!-- Please make it clear what the bug actually is -->
## How do you think this should be fixed?
<!-- Being specific by linking to lines of code and even suggesting changes will yield fastest resolution -->
## Please link to any related issues, pull requests, and/or discussion
<!-- This will help add crucial context to your report -->
## Bonus: What do you use archiver for, and do you find it useful?
<!-- We'd like to know! -->

View file

@ -0,0 +1,28 @@
---
name: Generic feature request
about: Suggest an idea for this project
title: ''
labels: feature request
assignees: ''
---
<!--
This issue template is for feature requests! If you are reporting a bug instead, please switch templates.
To fill this out, simply replace these comments with your answers.
-->
## What would you like to have changed?
<!-- Describe the feature or enhancement you are requesting -->
## Why is this feature a useful, necessary, and/or important addition to this project?
<!-- Please justify why this change adds value to the project, considering the added maintenance burden and complexity the change introduces -->
## What alternatives are there, or what are you doing in the meantime to work around the lack of this feature?
<!-- We want to get an idea of what is being done in practice, or how other projects support your feature -->
## Please link to any relevant issues, pull requests, or other discussions.
<!-- This adds crucial context to your feature request and can speed things up -->

View file

@ -0,0 +1,32 @@
---
name: New format request
about: Request a new archival or compression format
title: ''
labels: ''
assignees: ''
---
<!--
This template is specifically for adding support for a new archive or compression format to the library. Please, precisely one format per issue.
To fill this out, replace these comments with your answers.
-->
## Introduce the format you are requesting.
<!-- What is it called, what is it used for, etc? Some background information. -->
## What do YOU use this format for?
<!-- We want to know YOUR specific use cases; why do YOU need this format? -->
## What is the format's conventional file extension(s)?
<!-- Don't overthink this one, it's a simple question. -->
## Please link to the format's formal or official specification(s).
<!-- If there isn't a formal spec, link to the most official documentation for the format. Note that unstandardized formats are less likely to be added unless it is in high-enough demand. -->
## Which Go libraries could be used to implement this format?
<!-- This project itself does not actually implement low-level format reading and writing algorithms, so link to pure-Go libraries that do. Dependencies that use cgo or invoke external commands are not eligible for this project. -->

26
.github/workflows/macos-latest.yml vendored Normal file
View file

@ -0,0 +1,26 @@
name: Macos-latest
on: [push, pull_request]
jobs:
build-and-test:
strategy:
matrix:
go-version: [1.13, 1.17]
runs-on: macos-latest
steps:
- name: Install Go
uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v2
- name: Build
run: go build cmd/arc/main.go
- name: Test
run: go test -v ./...

26
.github/workflows/ubuntu-latest.yml vendored Normal file
View file

@ -0,0 +1,26 @@
name: Ubuntu-latest
on: [push, pull_request]
jobs:
build-and-test:
strategy:
matrix:
go-version: [1.13, 1.17]
runs-on: ubuntu-latest
steps:
- name: Install Go
uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v2
- name: Build
run: go build cmd/arc/main.go
- name: Test
run: go test -v ./...

26
.github/workflows/windows-latest.yml vendored Normal file
View file

@ -0,0 +1,26 @@
name: Windows-latest
on: [push, pull_request]
jobs:
build-and-test:
strategy:
matrix:
go-version: [1.13, 1.17]
runs-on: windows-latest
steps:
- name: Install Go
uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v2
- name: Build
run: go build cmd/arc/main.go
- name: Test
run: go test -v ./...

10
.gitignore vendored Normal file
View file

@ -0,0 +1,10 @@
/arc
/cmd/arc/arc
/dist/
/vendor/
.DS_Store
_gitignore
builds/
*.test
.*.sw*

41
.goreleaser.yml Normal file
View file

@ -0,0 +1,41 @@
# This is an example goreleaser.yaml file with some sane defaults.
# Make sure to check the documentation at http://goreleaser.com
project_name: arc
before:
hooks:
# You may remove this if you don't use go modules.
- go mod download
# you may remove this if you don't need go generate
- go generate ./...
builds:
-
env:
- CGO_ENABLED=0
main: ./cmd/arc
goos:
- linux
- windows
- darwin
goarch:
- 386
- amd64
- arm
- arm64
goarm:
- 6
- 7
archives:
-
format: binary
replacements:
darwin: mac
checksum:
name_template: 'checksums.txt'
snapshot:
name_template: "{{ .Tag }}-next"
changelog:
sort: asc
filters:
exclude:
- '^docs:'
- '^test:'

4
.prettierrc Normal file
View file

@ -0,0 +1,4 @@
{
"bracketSpacing": true,
"printWidth": 120,
}

21
LICENSE Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2016 Matthew Holt
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

324
README.md Normal file
View file

@ -0,0 +1,324 @@
# archiver [![archiver GoDoc](https://img.shields.io/badge/reference-godoc-blue.svg?style=flat-square)](https://pkg.go.dev/github.com/mholt/archiver?tab=doc) [![Ubuntu-latest](https://github.com/mholt/archiver/actions/workflows/ubuntu-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/ubuntu-latest.yml) [![Macos-latest](https://github.com/mholt/archiver/actions/workflows/macos-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/macos-latest.yml) [![Windows-latest](https://github.com/mholt/archiver/actions/workflows/windows-latest.yml/badge.svg)](https://github.com/mholt/archiver/actions/workflows/windows-latest.yml)
Introducing **Archiver 3.1** - a cross-platform, multi-format archive utility and Go library. A powerful and flexible library meets an elegant CLI in this generic replacement for several platform-specific or format-specific archive utilities.
## Features
Package archiver makes it trivially easy to make and extract common archive formats such as tarball (and its compressed variants) and zip. Simply name the input and output file(s). The `arc` command runs the same on all platforms and has no external dependencies (not even libc). It is powered by the Go standard library and several third-party, pure-Go libraries.
Files are put into the root of the archive; directories are recursively added, preserving structure.
- Make whole archives from a list of files
- Open whole archives to a folder
- Extract specific files/folders from archives
- Stream files in and out of archives without needing actual files on disk
- Traverse archive contents without loading them
- Compress files
- Decompress files
- Streaming compression and decompression
- Several archive and compression formats supported
### Format-dependent features
- Gzip is multithreaded
- Optionally create a top-level folder to avoid littering a directory or archive root with files
- Toggle overwrite existing files
- Adjust compression level
- Zip: store (not compress) already-compressed files
- Make all necessary directories
- Open password-protected RAR archives
- Optionally continue with other files after an error
### Supported compression formats
- brotli (br)
- bzip2 (bz2)
- flate (zip)
- gzip (gz)
- lz4
- snappy (sz)
- xz
- zstandard (zstd)
### Supported archive formats
- .zip
- .tar (including any compressed variants like .tar.gz)
- .rar (read-only)
Tar files can optionally be compressed using any of the above compression formats.
## GoDoc
See <https://pkg.go.dev/github.com/mholt/archiver/v3>
## Install
### With webi
[`webi`](https://webinstall.dev/arc) will install `webi` and `arc` to `~/.local/bin/` and update your `PATH`.
#### Mac, Linux, Raspberry Pi
```bash
curl -fsS https://webinstall.dev/arc | bash
```
#### Windows 10
```pwsh
curl.exe -fsS -A MS https://webinstall.dev/arc | powershell
```
### With Go
To install the runnable binary to your \$GOPATH/bin:
```bash
go install github.com/mholt/archiver/v3/cmd/arc@latest
```
### Manually
To install manually
1. Download the binary for your platform from the [Github Releases](https://github.com/mholt/archiver/releases) page.
2. Move the binary to a location in your path, for example:
- without `sudo`:
```bash
chmod a+x ~/Downloads/arc_*
mkdir -p ~/.local/bin
mv ~/Downloads/arc_* ~/.local/bin/arc
```
- as `root`:
```bash
chmod a+x ~/Downloads/arc_*
sudo mkdir -p /usr/local/bin
sudo mv ~/Downloads/arc_* /usr/local/bin/arc
```
3. If needed, update `~/.bashrc` or `~/.profile` to include add `arc` in your `PATH`, for example:
```
echo 'PATH="$HOME:/.local/bin:$PATH"' >> ~/.bashrc
```
## Build from Source
You can successfully build `arc` with just the go tooling, or with `goreleaser`.
### With `go`
```bash
go build cmd/arc/*.go
```
### Multi-platform with `goreleaser`
Builds with `goreleaser` will also include version info.
```bash
goreleaser --snapshot --skip-publish --rm-dist
```
## Command Use
### Make new archive
```bash
# Syntax: arc archive [archive name] [input files...]
arc archive test.tar.gz file1.txt images/file2.jpg folder/subfolder
```
(At least one input file is required.)
### Extract entire archive
```bash
# Syntax: arc unarchive [archive name] [destination]
arc unarchive test.tar.gz
```
(The destination path is optional; default is current directory.)
The archive name must end with a supported file extension&mdash;this is how it knows what kind of archive to make. Run `arc help` for more help.
### List archive contents
```bash
# Syntax: arc ls [archive name]
arc ls caddy_dist.tar.gz
```
```txt
drwxr-xr-x matt staff 0 2018-09-19 15:47:18 -0600 MDT dist/
-rw-r--r-- matt staff 6148 2017-08-07 18:34:22 -0600 MDT dist/.DS_Store
-rw-r--r-- matt staff 22481 2018-09-19 15:47:18 -0600 MDT dist/CHANGES.txt
-rw-r--r-- matt staff 17189 2018-09-19 15:47:18 -0600 MDT dist/EULA.txt
-rw-r--r-- matt staff 25261 2016-03-07 16:32:00 -0700 MST dist/LICENSES.txt
-rw-r--r-- matt staff 1017 2018-09-19 15:47:18 -0600 MDT dist/README.txt
-rw-r--r-- matt staff 288 2016-03-21 11:52:38 -0600 MDT dist/gitcookie.sh.enc
...
```
### Extract a specific file or folder from an archive
```bash
# Syntax: arc extract [archive name] [path in archive] [destination on disk]
arc extract test.tar.gz foo/hello.txt extracted/hello.txt
```
### Compress a single file
```bash
# Syntax: arc compress [input file] [output file]
arc compress test.txt compressed_test.txt.gz
arc compress test.txt gz
```
For convenience, the output file (second argument) may simply be a compression format (without leading dot), in which case the output filename will be the same as the input filename but with the format extension appended, and the input file will be deleted if successful.
### Decompress a single file
```bash
# Syntax: arc decompress [input file] [output file]
arc decompress test.txt.gz original_test.txt
arc decompress test.txt.gz
```
For convenience, the output file (second argument) may be omitted. In that case, the output filename will have the same name as the input filename, but with the compression extension stripped from the end; and the input file will be deleted if successful.
### Flags
Flags are specified before the subcommand. Use `arc help` or `arc -h` to get usage help and a description of flags with their default values.
## Library Use
The archiver package allows you to easily create and open archives, walk their contents, extract specific files, compress and decompress files, and even stream archives in and out using pure io.Reader and io.Writer interfaces, without ever needing to touch the disk.
To use as a dependency in your project:
```bash
go get github.com/mholt/archiver/v3
```
```go
import "github.com/mholt/archiver/v3"
```
[See the package's GoDoc](https://pkg.go.dev/github.com/mholt/archiver?tab=doc) for full API documentation.
For example, creating or unpacking an archive file:
```go
err := archiver.Archive([]string{"testdata", "other/file.txt"}, "test.zip")
// ...
err = archiver.Unarchive("test.tar.gz", "test")
```
The archive format is determined by file extension. (There are [several functions in this package](https://pkg.go.dev/github.com/mholt/archiver?tab=doc) which perform a task by inferring the format from file extension or file header, including `Archive()`, `Unarchive()`, `CompressFile()`, and `DecompressFile()`.)
To configure the archiver used or perform, create an instance of the format's type:
```go
z := archiver.Zip{
CompressionLevel: flate.DefaultCompression,
MkdirAll: true,
SelectiveCompression: true,
ContinueOnError: false,
OverwriteExisting: false,
ImplicitTopLevelFolder: false,
}
err := z.Archive([]string{"testdata", "other/file.txt"}, "/Users/matt/Desktop/test.zip")
```
Inspecting an archive:
```go
err = z.Walk("/Users/matt/Desktop/test.zip", func(f archiver.File) error {
zfh, ok := f.Header.(zip.FileHeader)
if ok {
fmt.Println("Filename:", zfh.Name)
}
return nil
})
```
Streaming files into an archive that is being written to the HTTP response:
```go
err = z.Create(responseWriter)
if err != nil {
return err
}
defer z.Close()
for _, fname := range filenames {
info, err := os.Stat(fname)
if err != nil {
return err
}
// get file's name for the inside of the archive
internalName, err := archiver.NameInArchive(info, fname, fname)
if err != nil {
return err
}
// open the file
file, err := os.Open(f)
if err != nil {
return err
}
// write it to the archive
err = z.Write(archiver.File{
FileInfo: archiver.FileInfo{
FileInfo: info,
CustomName: internalName,
},
ReadCloser: file,
})
file.Close()
if err != nil {
return err
}
}
```
The `archiver.File` type allows you to use actual files with archives, or to mimic files when you only have streams.
There's a lot more that can be done, too. [See the GoDoc](https://pkg.go.dev/github.com/mholt/archiver?tab=doc) for full API documentation.
**Security note: This package does NOT attempt to mitigate zip-slip attacks.** It is [extremely difficult](https://github.com/rubyzip/rubyzip/pull/376) [to do properly](https://github.com/mholt/archiver/pull/65#issuecomment-395988244) and [seemingly impossible to mitigate effectively across platforms](https://github.com/golang/go/issues/20126). [Attempted fixes have broken processing of legitimate files in production](https://github.com/mholt/archiver/pull/70#issuecomment-423267320), rendering the program unusable. Our recommendation instead is to inspect the contents of an untrusted archive before extracting it (this package provides `Walkers`) and decide if you want to proceed with extraction.
## Project Values
This project has a few principle-based goals that guide its development:
- **Do our thing really well.** Our thing is creating, opening, inspecting, compressing, and streaming archive files. It is not meant to be a replacement for specific archive format tools like tar, zip, etc. that have lots of features and customizability. (Some customizability is OK, but not to the extent that it becomes overly complicated or error-prone.)
- **Have good tests.** Changes should be covered by tests.
- **Limit dependencies.** Keep the package lightweight.
- **Pure Go.** This means no cgo or other external/system dependencies. This package should be able to stand on its own and cross-compile easily to any platform -- and that includes its library dependencies.
- **Idiomatic Go.** Keep interfaces small, variable names semantic, vet shows no errors, the linter is generally quiet, etc.
- **Be elegant.** This package should be elegant to use and its code should be elegant when reading and testing. If it doesn't feel good, fix it up.
- **Well-documented.** Use comments prudently; explain why non-obvious code is necessary (and use tests to enforce it). Keep the docs updated, and have examples where helpful.
- **Keep it efficient.** This often means keep it simple. Fast code is valuable.
- **Consensus.** Contributions should ideally be approved by multiple reviewers before being merged. Generally, avoid merging multi-chunk changes that do not go through at least one or two iterations/reviews. Except for trivial changes, PRs are seldom ready to merge right away.
- **Have fun contributing.** Coding is awesome!
We welcome contributions and appreciate your efforts! However, please open issues to discuss any changes before spending the time preparing a pull request. This will save time, reduce frustration, and help coordinate the work. Thank you!

15
SECURITY.md Normal file
View file

@ -0,0 +1,15 @@
# Security Policy
## Supported Versions
| Version | Supported |
| ------- | ------------------ |
| >= 3.x | :white_check_mark: |
| < 3.0 | :x: |
## Reporting a Vulnerability
Please send the details to both of us:
- AJ ONeal <coolaj86@gmail.com>
- Matthew Holt <Matthew.Holt@gmail.com>

540
archiver.go Normal file
View file

@ -0,0 +1,540 @@
// Package archiver facilitates convenient, cross-platform, high-level archival
// and compression operations for a variety of formats and compression algorithms.
//
// This package and its dependencies are written in pure Go (not cgo) and
// have no external dependencies, so they should run on all major platforms.
// (It also comes with a command for CLI use in the cmd/arc folder.)
//
// Each supported format or algorithm has a unique type definition that
// implements the interfaces corresponding to the tasks they perform. For
// example, the Tar type implements Reader, Writer, Archiver, Unarchiver,
// Walker, and several other interfaces.
//
// The most common functions are implemented at the package level for
// convenience: Archive, Unarchive, Walk, Extract, CompressFile, and
// DecompressFile. With these, the format type is chosen implicitly,
// and a sane default configuration is used.
//
// To customize a format's configuration, create an instance of its struct
// with its fields set to the desired values. You can also use and customize
// the handy Default* (replace the wildcard with the format's type name)
// for a quick, one-off instance of the format's type.
//
// To obtain a new instance of a format's struct with the default config, use
// the provided New*() functions. This is not required, however. An empty
// struct of any type, for example &Zip{} is perfectly valid, so you may
// create the structs manually, too. The examples on this page show how
// either may be done.
//
// See the examples in this package for an idea of how to wield this package
// for common tasks. Most of the examples which are specific to a certain
// format type, for example Zip, can be applied to other types that implement
// the same interfaces. For example, using Zip is very similar to using Tar
// or TarGz (etc), and using Gz is very similar to using Sz or Xz (etc).
//
// When creating archives or compressing files using a specific instance of
// the format's type, the name of the output file MUST match that of the
// format, to prevent confusion later on. If you absolutely need a different
// file extension, you may rename the file afterward.
//
// Values in this package are NOT safe for concurrent use. There is no
// performance benefit of reusing them, and since they may contain important
// state (especially while walking, reading, or writing), it is NOT
// recommended to reuse values from this package or change their configuration
// after they are in use.
package archiver
import (
"fmt"
"io"
"os"
"path"
"path/filepath"
"runtime"
"strings"
)
// Archiver is a type that can create an archive file
// from a list of source file names.
type Archiver interface {
ExtensionChecker
// Archive adds all the files or folders in sources
// to an archive to be created at destination. Files
// are added to the root of the archive, and directories
// are walked and recursively added, preserving folder
// structure.
Archive(sources []string, destination string) error
}
// ExtensionChecker validates file extensions
type ExtensionChecker interface {
CheckExt(name string) error
}
// FilenameChecker validates filenames to prevent path traversal attacks
type FilenameChecker interface {
CheckPath(to, filename string) error
}
// Unarchiver is a type that can extract archive files
// into a folder.
type Unarchiver interface {
Unarchive(source, destination string) error
}
// Writer can write discrete byte streams of files to
// an output stream.
type Writer interface {
Create(out io.Writer) error
Write(f File) error
Close() error
}
// Reader can read discrete byte streams of files from
// an input stream.
type Reader interface {
Open(in io.Reader, size int64) error
Read() (File, error)
Close() error
}
// Extractor can extract a specific file from a source
// archive to a specific destination folder on disk.
type Extractor interface {
Extract(source, target, destination string) error
}
// File provides methods for accessing information about
// or contents of a file within an archive.
type File struct {
os.FileInfo
// The original header info; depends on
// type of archive -- could be nil, too.
Header interface{}
// Allow the file contents to be read (and closed)
io.ReadCloser
}
// FileInfo is an os.FileInfo but optionally with
// a custom name, useful if dealing with files that
// are not actual files on disk, or which have a
// different name in an archive than on disk.
type FileInfo struct {
os.FileInfo
CustomName string
// Stores path to the source.
// Used when reading a symlink.
SourcePath string
}
// Name returns fi.CustomName if not empty;
// otherwise it returns fi.FileInfo.Name().
func (fi FileInfo) Name() string {
if fi.CustomName != "" {
return fi.CustomName
}
return fi.FileInfo.Name()
}
// ReadFakeCloser is an io.Reader that has
// a no-op close method to satisfy the
// io.ReadCloser interface.
type ReadFakeCloser struct {
io.Reader
}
// Close implements io.Closer.
func (rfc ReadFakeCloser) Close() error { return nil }
// Walker can walk an archive file and return information
// about each item in the archive.
type Walker interface {
Walk(archive string, walkFn WalkFunc) error
}
// WalkFunc is called at each item visited by Walk.
// If an error is returned, the walk may continue
// if the Walker is configured to continue on error.
// The sole exception is the error value ErrStopWalk,
// which stops the walk without an actual error.
type WalkFunc func(f File) error
// ErrStopWalk signals Walk to break without error.
var ErrStopWalk = fmt.Errorf("walk stopped")
// ErrFormatNotRecognized is an error that will be
// returned if the file is not a valid archive format.
var ErrFormatNotRecognized = fmt.Errorf("format not recognized")
// Compressor compresses to out what it reads from in.
// It also ensures a compatible or matching file extension.
type Compressor interface {
ExtensionChecker
Compress(in io.Reader, out io.Writer) error
}
// Decompressor decompresses to out what it reads from in.
type Decompressor interface {
Decompress(in io.Reader, out io.Writer) error
}
// Matcher is a type that can return whether the given
// file appears to match the implementation's format.
// Implementations should return the file's read position
// to where it was when the method was called.
type Matcher interface {
Match(io.ReadSeeker) (bool, error)
}
// Archive creates an archive of the source files to a new file at destination.
// The archive format is chosen implicitly by file extension.
func Archive(sources []string, destination string) error {
aIface, err := ByExtension(destination)
if err != nil {
return err
}
a, ok := aIface.(Archiver)
if !ok {
return fmt.Errorf("format specified by destination filename is not an archive format: %s (%T)", destination, aIface)
}
return a.Archive(sources, destination)
}
// Unarchive unarchives the given archive file into the destination folder.
// The archive format is selected implicitly.
func Unarchive(source, destination string) error {
uaIface, err := ByExtension(source)
if err != nil {
return err
}
u, ok := uaIface.(Unarchiver)
if !ok {
return fmt.Errorf("format specified by source filename is not an archive format: %s (%T)", source, uaIface)
}
return u.Unarchive(source, destination)
}
// Walk calls walkFn for each file within the given archive file.
// The archive format is chosen implicitly.
func Walk(archive string, walkFn WalkFunc) error {
wIface, err := ByExtension(archive)
if err != nil {
return err
}
w, ok := wIface.(Walker)
if !ok {
return fmt.Errorf("format specified by archive filename is not a walker format: %s (%T)", archive, wIface)
}
return w.Walk(archive, walkFn)
}
// Extract extracts a single file from the given source archive. If the target
// is a directory, the entire folder will be extracted into destination. The
// archive format is chosen implicitly.
func Extract(source, target, destination string) error {
eIface, err := ByExtension(source)
if err != nil {
return err
}
e, ok := eIface.(Extractor)
if !ok {
return fmt.Errorf("format specified by source filename is not an extractor format: %s (%T)", source, eIface)
}
return e.Extract(source, target, destination)
}
// CompressFile is a convenience function to simply compress a file.
// The compression algorithm is selected implicitly based on the
// destination's extension.
func CompressFile(source, destination string) error {
cIface, err := ByExtension(destination)
if err != nil {
return err
}
c, ok := cIface.(Compressor)
if !ok {
return fmt.Errorf("format specified by destination filename is not a recognized compression algorithm: %s", destination)
}
return FileCompressor{Compressor: c}.CompressFile(source, destination)
}
// DecompressFile is a convenience function to simply decompress a file.
// The decompression algorithm is selected implicitly based on the
// source's extension.
func DecompressFile(source, destination string) error {
cIface, err := ByExtension(source)
if err != nil {
return err
}
c, ok := cIface.(Decompressor)
if !ok {
return fmt.Errorf("format specified by source filename is not a recognized compression algorithm: %s", source)
}
return FileCompressor{Decompressor: c}.DecompressFile(source, destination)
}
func fileExists(name string) bool {
_, err := os.Stat(name)
return !os.IsNotExist(err)
}
func mkdir(dirPath string, dirMode os.FileMode) error {
err := os.MkdirAll(dirPath, dirMode)
if err != nil {
return fmt.Errorf("%s: making directory: %v", dirPath, err)
}
return nil
}
func writeNewFile(fpath string, in io.Reader, fm os.FileMode) error {
err := os.MkdirAll(filepath.Dir(fpath), 0755)
if err != nil {
return fmt.Errorf("%s: making directory for file: %v", fpath, err)
}
out, err := os.Create(fpath)
if err != nil {
return fmt.Errorf("%s: creating new file: %v", fpath, err)
}
defer out.Close()
err = out.Chmod(fm)
if err != nil && runtime.GOOS != "windows" {
return fmt.Errorf("%s: changing file mode: %v", fpath, err)
}
_, err = io.Copy(out, in)
if err != nil {
return fmt.Errorf("%s: writing file: %v", fpath, err)
}
return nil
}
func writeNewSymbolicLink(fpath string, target string) error {
err := os.MkdirAll(filepath.Dir(fpath), 0755)
if err != nil {
return fmt.Errorf("%s: making directory for file: %v", fpath, err)
}
_, err = os.Lstat(fpath)
if err == nil {
err = os.Remove(fpath)
if err != nil {
return fmt.Errorf("%s: failed to unlink: %+v", fpath, err)
}
}
err = os.Symlink(target, fpath)
if err != nil {
return fmt.Errorf("%s: making symbolic link for: %v", fpath, err)
}
return nil
}
func writeNewHardLink(fpath string, target string) error {
err := os.MkdirAll(filepath.Dir(fpath), 0755)
if err != nil {
return fmt.Errorf("%s: making directory for file: %v", fpath, err)
}
_, err = os.Lstat(fpath)
if err == nil {
err = os.Remove(fpath)
if err != nil {
return fmt.Errorf("%s: failed to unlink: %+v", fpath, err)
}
}
err = os.Link(target, fpath)
if err != nil {
return fmt.Errorf("%s: making hard link for: %v", fpath, err)
}
return nil
}
func isSymlink(fi os.FileInfo) bool {
return fi.Mode()&os.ModeSymlink != 0
}
// within returns true if sub is within or equal to parent.
func within(parent, sub string) bool {
rel, err := filepath.Rel(parent, sub)
if err != nil {
return false
}
return !strings.Contains(rel, "..")
}
// multipleTopLevels returns true if the paths do not
// share a common top-level folder.
func multipleTopLevels(paths []string) bool {
if len(paths) < 2 {
return false
}
var lastTop string
for _, p := range paths {
p = strings.TrimPrefix(strings.Replace(p, `\`, "/", -1), "/")
for {
next := path.Dir(p)
if next == "." {
break
}
p = next
}
if lastTop == "" {
lastTop = p
}
if p != lastTop {
return true
}
}
return false
}
// folderNameFromFileName returns a name for a folder
// that is suitable based on the filename, which will
// be stripped of its extensions.
func folderNameFromFileName(filename string) string {
base := filepath.Base(filename)
firstDot := strings.Index(base, ".")
if firstDot > -1 {
return base[:firstDot]
}
return base
}
// makeNameInArchive returns the filename for the file given by fpath to be used within
// the archive. sourceInfo is the FileInfo obtained by calling os.Stat on source, and baseDir
// is an optional base directory that becomes the root of the archive. fpath should be the
// unaltered file path of the file given to a filepath.WalkFunc.
func makeNameInArchive(sourceInfo os.FileInfo, source, baseDir, fpath string) (string, error) {
name := filepath.Base(fpath) // start with the file or dir name
if sourceInfo.IsDir() {
// preserve internal directory structure; that's the path components
// between the source directory's leaf and this file's leaf
dir, err := filepath.Rel(filepath.Dir(source), filepath.Dir(fpath))
if err != nil {
return "", err
}
// prepend the internal directory structure to the leaf name,
// and convert path separators to forward slashes as per spec
name = path.Join(filepath.ToSlash(dir), name)
}
return path.Join(baseDir, name), nil // prepend the base directory
}
// NameInArchive returns a name for the file at fpath suitable for
// the inside of an archive. The source and its associated sourceInfo
// is the path where walking a directory started, and if no directory
// was walked, source may == fpath. The returned name is essentially
// the components of the path between source and fpath, preserving
// the internal directory structure.
func NameInArchive(sourceInfo os.FileInfo, source, fpath string) (string, error) {
return makeNameInArchive(sourceInfo, source, "", fpath)
}
// ByExtension returns an archiver and unarchiver, or compressor
// and decompressor, based on the extension of the filename.
func ByExtension(filename string) (interface{}, error) {
var ec interface{}
for _, c := range extCheckers {
if err := c.CheckExt(filename); err == nil {
ec = c
break
}
}
switch ec.(type) {
case *Rar:
return NewRar(), nil
case *Tar:
return NewTar(), nil
case *TarBrotli:
return NewTarBrotli(), nil
case *TarBz2:
return NewTarBz2(), nil
case *TarGz:
return NewTarGz(), nil
case *TarLz4:
return NewTarLz4(), nil
case *TarSz:
return NewTarSz(), nil
case *TarXz:
return NewTarXz(), nil
case *TarZstd:
return NewTarZstd(), nil
case *Zip:
return NewZip(), nil
case *Gz:
return NewGz(), nil
case *Bz2:
return NewBz2(), nil
case *Lz4:
return NewLz4(), nil
case *Snappy:
return NewSnappy(), nil
case *Xz:
return NewXz(), nil
case *Zstd:
return NewZstd(), nil
}
return nil, fmt.Errorf("format unrecognized by filename: %s", filename)
}
// ByHeader returns the unarchiver value that matches the input's
// file header. It does not affect the current read position.
// If the file's header is not a recognized archive format, then
// ErrFormatNotRecognized will be returned.
func ByHeader(input io.ReadSeeker) (Unarchiver, error) {
var matcher Matcher
for _, m := range matchers {
ok, err := m.Match(input)
if err != nil {
return nil, fmt.Errorf("matching on format %s: %v", m, err)
}
if ok {
matcher = m
break
}
}
switch matcher.(type) {
case *Zip:
return NewZip(), nil
case *Tar:
return NewTar(), nil
case *Rar:
return NewRar(), nil
}
return nil, ErrFormatNotRecognized
}
// extCheckers is a list of the format implementations
// that can check extensions. Only to be used for
// checking extensions - not any archival operations.
var extCheckers = []ExtensionChecker{
&TarBrotli{},
&TarBz2{},
&TarGz{},
&TarLz4{},
&TarSz{},
&TarXz{},
&TarZstd{},
&Rar{},
&Tar{},
&Zip{},
&Brotli{},
&Gz{},
&Bz2{},
&Lz4{},
&Snappy{},
&Xz{},
&Zstd{},
}
var matchers = []Matcher{
&Rar{},
&Tar{},
&Zip{},
}

591
archiver_test.go Normal file
View file

@ -0,0 +1,591 @@
package archiver
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
"time"
)
func TestWithin(t *testing.T) {
for i, tc := range []struct {
path1, path2 string
expect bool
}{
{
path1: "/foo",
path2: "/foo/bar",
expect: true,
},
{
path1: "/foo",
path2: "/foobar/asdf",
expect: false,
},
{
path1: "/foobar/",
path2: "/foobar/asdf",
expect: true,
},
{
path1: "/foobar/asdf",
path2: "/foobar",
expect: false,
},
{
path1: "/foobar/asdf",
path2: "/foobar/",
expect: false,
},
{
path1: "/",
path2: "/asdf",
expect: true,
},
{
path1: "/asdf",
path2: "/asdf",
expect: true,
},
{
path1: "/",
path2: "/",
expect: true,
},
{
path1: "/foo/bar/daa",
path2: "/foo",
expect: false,
},
{
path1: "/foo/",
path2: "/foo/bar/daa",
expect: true,
},
} {
actual := within(tc.path1, tc.path2)
if actual != tc.expect {
t.Errorf("Test %d: [%s %s] Expected %t but got %t", i, tc.path1, tc.path2, tc.expect, actual)
}
}
}
func TestMultipleTopLevels(t *testing.T) {
for i, tc := range []struct {
set []string
expect bool
}{
{
set: []string{},
expect: false,
},
{
set: []string{"/foo"},
expect: false,
},
{
set: []string{"/foo", "/foo/bar"},
expect: false,
},
{
set: []string{"/foo", "/bar"},
expect: true,
},
{
set: []string{"/foo", "/foobar"},
expect: true,
},
{
set: []string{"foo", "foo/bar"},
expect: false,
},
{
set: []string{"foo", "/foo/bar"},
expect: false,
},
{
set: []string{"../foo", "foo/bar"},
expect: true,
},
{
set: []string{`C:\foo\bar`, `C:\foo\bar\zee`},
expect: false,
},
{
set: []string{`C:\`, `C:\foo\bar`},
expect: false,
},
{
set: []string{`D:\foo`, `E:\foo`},
expect: true,
},
{
set: []string{`D:\foo`, `D:\foo\bar`, `C:\foo`},
expect: true,
},
{
set: []string{"/foo", "/", "/bar"},
expect: true,
},
} {
actual := multipleTopLevels(tc.set)
if actual != tc.expect {
t.Errorf("Test %d: %v: Expected %t but got %t", i, tc.set, tc.expect, actual)
}
}
}
func TestMakeNameInArchive(t *testing.T) {
for i, tc := range []struct {
sourceInfo fakeFileInfo
source string // a file path explicitly listed by the user to include in the archive
baseDir string // the base or root directory or path within the archive which contains all other files
fpath string // the file path being walked; if source is a directory, this will be a child path
expect string
}{
{
sourceInfo: fakeFileInfo{isDir: false},
source: "foo.txt",
baseDir: "",
fpath: "foo.txt",
expect: "foo.txt",
},
{
sourceInfo: fakeFileInfo{isDir: false},
source: "foo.txt",
baseDir: "base",
fpath: "foo.txt",
expect: "base/foo.txt",
},
{
sourceInfo: fakeFileInfo{isDir: false},
source: "foo/bar.txt",
baseDir: "",
fpath: "foo/bar.txt",
expect: "bar.txt",
},
{
sourceInfo: fakeFileInfo{isDir: false},
source: "foo/bar.txt",
baseDir: "base",
fpath: "foo/bar.txt",
expect: "base/bar.txt",
},
{
sourceInfo: fakeFileInfo{isDir: true},
source: "foo/bar",
baseDir: "base",
fpath: "foo/bar",
expect: "base/bar",
},
{
sourceInfo: fakeFileInfo{isDir: false},
source: "/absolute/path.txt",
baseDir: "",
fpath: "/absolute/path.txt",
expect: "path.txt",
},
{
sourceInfo: fakeFileInfo{isDir: false},
source: "/absolute/sub/path.txt",
baseDir: "",
fpath: "/absolute/sub/path.txt",
expect: "path.txt",
},
{
sourceInfo: fakeFileInfo{isDir: false},
source: "/absolute/sub/path.txt",
baseDir: "base",
fpath: "/absolute/sub/path.txt",
expect: "base/path.txt",
},
{
sourceInfo: fakeFileInfo{isDir: false},
source: "sub/path.txt",
baseDir: "base/subbase",
fpath: "sub/path.txt",
expect: "base/subbase/path.txt",
},
{
sourceInfo: fakeFileInfo{isDir: true},
source: "sub/dir",
baseDir: "base/subbase",
fpath: "sub/dir/path.txt",
expect: "base/subbase/dir/path.txt",
},
{
sourceInfo: fakeFileInfo{isDir: true},
source: "sub/dir",
baseDir: "base/subbase",
fpath: "sub/dir/sub2/sub3/path.txt",
expect: "base/subbase/dir/sub2/sub3/path.txt",
},
{
sourceInfo: fakeFileInfo{isDir: true},
source: `/absolute/dir`,
baseDir: "base",
fpath: `/absolute/dir/sub1/sub2/file.txt`,
expect: "base/dir/sub1/sub2/file.txt",
},
} {
actual, err := makeNameInArchive(tc.sourceInfo, tc.source, tc.baseDir, tc.fpath)
if err != nil {
t.Errorf("Test %d: Got error: %v", i, err)
}
if actual != tc.expect {
t.Errorf("Test %d: Expected '%s' but got '%s'", i, tc.expect, actual)
}
}
}
// TODO: We need a new .rar file since we moved the test corpus into the testdata/corpus subfolder.
/*
func TestRarUnarchive(t *testing.T) {
au := DefaultRar
auStr := fmt.Sprintf("%s", au)
tmp, err := ioutil.TempDir("", "archiver_test")
if err != nil {
t.Fatalf("[%s] %v", auStr, err)
}
defer os.RemoveAll(tmp)
dest := filepath.Join(tmp, "extraction_test_"+auStr)
os.Mkdir(dest, 0755)
file := "testdata/sample.rar"
err = au.Unarchive(file, dest)
if err != nil {
t.Fatalf("[%s] extracting archive [%s -> %s]: didn't expect an error, but got: %v", auStr, file, dest, err)
}
// Check that what was extracted is what was compressed
// Extracting links isn't implemented yet (in github.com/nwaples/rardecode lib there are no methods to get symlink info)
// Files access modes may differs on different machines, we are comparing extracted(as archive host) and local git clone
symmetricTest(t, auStr, dest, false, false)
}
*/
func TestArchiveUnarchive(t *testing.T) {
for _, af := range archiveFormats {
au, ok := af.(archiverUnarchiver)
if !ok {
t.Errorf("%s (%T): not an Archiver and Unarchiver", af, af)
continue
}
testArchiveUnarchive(t, au)
}
}
func TestArchiveUnarchiveWithFolderPermissions(t *testing.T) {
dir := "testdata/corpus/proverbs/extra"
currentPerms, err := os.Stat(dir)
if err != nil {
t.Fatalf("%v", err)
}
err = os.Chmod(dir, 0700)
if err != nil {
t.Fatalf("%v", err)
}
defer func() {
err := os.Chmod(dir, currentPerms.Mode())
if err != nil {
t.Fatalf("%v", err)
}
}()
TestArchiveUnarchive(t)
}
func testArchiveUnarchive(t *testing.T, au archiverUnarchiver) {
auStr := fmt.Sprintf("%s", au)
tmp, err := ioutil.TempDir("", "archiver_test")
if err != nil {
t.Fatalf("[%s] %v", auStr, err)
}
defer os.RemoveAll(tmp)
// Test creating archive
outfile := filepath.Join(tmp, "archiver_test."+auStr)
err = au.Archive([]string{"testdata/corpus"}, outfile)
if err != nil {
t.Fatalf("[%s] making archive: didn't expect an error, but got: %v", auStr, err)
}
// Test format matching (TODO: Make this its own test, out of band with the archive/unarchive tests)
//testMatching(t, au, outfile) // TODO: Disabled until we can finish implementing this for compressed tar formats
// Test extracting archive
dest := filepath.Join(tmp, "extraction_test_"+auStr)
_ = os.Mkdir(dest, 0755)
err = au.Unarchive(outfile, dest)
if err != nil {
t.Fatalf("[%s] extracting archive [%s -> %s]: didn't expect an error, but got: %v", auStr, outfile, dest, err)
}
// Check that what was extracted is what was compressed
symmetricTest(t, auStr, dest, true, true)
}
/*
// testMatching tests that au can match the format of archiveFile.
func testMatching(t *testing.T, au archiverUnarchiver, archiveFile string) {
m, ok := au.(Matcher)
if !ok {
t.Logf("[NOTICE] %T (%s) is not a Matcher", au, au)
return
}
file, err := os.Open(archiveFile)
if err != nil {
t.Fatalf("[%s] opening file for matching: %v", au, err)
}
defer file.Close()
tmpBuf := make([]byte, 2048)
io.ReadFull(file, tmpBuf)
matched, err := m.Match(file)
if err != nil {
t.Fatalf("%s (%T): testing matching: got error, expected none: %v", m, m, err)
}
if !matched {
t.Fatalf("%s (%T): format should have matched, but didn't", m, m)
}
}
*/
// symmetricTest compares the contents of a destination directory to the contents
// of the test corpus and tests that they are equal.
func symmetricTest(t *testing.T, formatName, dest string, testSymlinks, testModes bool) {
var expectedFileCount int
_ = filepath.Walk("testdata/corpus", func(fpath string, info os.FileInfo, err error) error {
if testSymlinks || (info.Mode()&os.ModeSymlink) == 0 {
expectedFileCount++
}
return nil
})
// If outputs equals inputs, we're good; traverse output files
// and compare file names, file contents, and file count.
var actualFileCount int
_ = filepath.Walk(dest, func(fpath string, info os.FileInfo, _ error) error {
if fpath == dest {
return nil
}
if testSymlinks || (info.Mode()&os.ModeSymlink) == 0 {
actualFileCount++
}
origPath, err := filepath.Rel(dest, fpath)
if err != nil {
t.Fatalf("[%s] %s: Error inducing original file path: %v", formatName, fpath, err)
}
origPath = filepath.Join("testdata", origPath)
expectedFileInfo, err := os.Lstat(origPath)
if err != nil {
t.Fatalf("[%s] %s: Error obtaining original file info: %v", formatName, fpath, err)
}
if !testSymlinks && (expectedFileInfo.Mode()&os.ModeSymlink) != 0 {
return nil
}
actualFileInfo, err := os.Lstat(fpath)
if err != nil {
t.Fatalf("[%s] %s: Error obtaining actual file info: %v", formatName, fpath, err)
}
if testModes && actualFileInfo.Mode() != expectedFileInfo.Mode() {
t.Fatalf("[%s] %s: File mode differed between on disk and compressed", formatName,
expectedFileInfo.Mode().String()+" : "+actualFileInfo.Mode().String())
}
if info.IsDir() {
// stat dir instead of read file
_, err = os.Stat(origPath)
if err != nil {
t.Fatalf("[%s] %s: Couldn't stat original directory (%s): %v", formatName,
fpath, origPath, err)
}
return nil
}
if (actualFileInfo.Mode() & os.ModeSymlink) != 0 {
expectedLinkTarget, err := os.Readlink(origPath)
if err != nil {
t.Fatalf("[%s] %s: Couldn't read original symlink target: %v", formatName, origPath, err)
}
actualLinkTarget, err := os.Readlink(fpath)
if err != nil {
t.Fatalf("[%s] %s: Couldn't read actual symlink target: %v", formatName, fpath, err)
}
if expectedLinkTarget != actualLinkTarget {
t.Fatalf("[%s] %s: Symlink targets differed between on disk and compressed", formatName, origPath)
}
return nil
}
expected, err := ioutil.ReadFile(origPath)
if err != nil {
t.Fatalf("[%s] %s: Couldn't open original file (%s) from disk: %v", formatName,
fpath, origPath, err)
}
actual, err := ioutil.ReadFile(fpath)
if err != nil {
t.Fatalf("[%s] %s: Couldn't open new file from disk: %v", formatName, fpath, err)
}
if !bytes.Equal(expected, actual) {
t.Fatalf("[%s] %s: File contents differed between on disk and compressed", formatName, origPath)
}
return nil
})
if got, want := actualFileCount, expectedFileCount; got != want {
t.Fatalf("[%s] Expected %d resulting files, got %d", formatName, want, got)
}
}
func TestUnarchiveWithStripComponents(t *testing.T) {
testArchives := []string{
"testdata/sample.rar",
"testdata/testarchives/evilarchives/evil.zip",
"testdata/testarchives/evilarchives/evil.tar",
"testdata/testarchives/evilarchives/evil.tar.gz",
"testdata/testarchives/evilarchives/evil.tar.bz2",
}
to := "testdata/testarchives/destarchives/"
for _, archiveName := range testArchives {
f, err := ByExtension(archiveName)
if err != nil {
t.Error(err)
}
var target string
switch v := f.(type) {
case *Rar:
v.OverwriteExisting = false
v.ImplicitTopLevelFolder = false
v.StripComponents = 1
target = "quote1.txt"
case *Zip:
case *Tar:
v.OverwriteExisting = false
v.ImplicitTopLevelFolder = false
v.StripComponents = 1
target = "safefile"
case *TarGz:
case *TarBz2:
v.Tar.OverwriteExisting = false
v.Tar.ImplicitTopLevelFolder = false
v.Tar.StripComponents = 1
target = "safefile"
}
u := f.(Unarchiver)
if err := u.Unarchive(archiveName, to); err != nil {
fmt.Println(err)
}
if _, err := os.Stat(filepath.Join(to, target)); os.IsNotExist(err) {
t.Errorf("file is incorrectly extracted: %s", target)
}
os.RemoveAll(to)
}
}
// test at runtime if the CheckFilename function is behaving properly for the archive formats
func TestSafeExtraction(t *testing.T) {
testArchives := []string{
"testdata/testarchives/evilarchives/evil.zip",
"testdata/testarchives/evilarchives/evil.tar",
"testdata/testarchives/evilarchives/evil.tar.gz",
"testdata/testarchives/evilarchives/evil.tar.bz2",
}
for _, archiveName := range testArchives {
expected := true // 'evilfile' should not be extracted outside of destination directory and 'safefile' should be extracted anyway in the destination folder anyway
if _, err := os.Stat(archiveName); os.IsNotExist(err) {
t.Errorf("archive not found")
}
actual := CheckFilenames(archiveName)
if actual != expected {
t.Errorf("CheckFilename is misbehaving for archive format type %s", filepath.Ext(archiveName))
}
}
}
func CheckFilenames(archiveName string) bool {
evilNotExtracted := false // by default we cannot assume that the path traversal filename is mitigated by CheckFilename
safeExtracted := false // by default we cannot assume that a benign file can be extracted successfully
// clean the destination folder after this test
defer os.RemoveAll("testdata/testarchives/destarchives/")
err := Unarchive(archiveName, "testdata/testarchives/destarchives/")
if err != nil {
fmt.Println(err)
}
// is 'evilfile' prevented to be extracted outside of the destination folder?
if _, err := os.Stat("testdata/testarchives/evilfile"); os.IsNotExist(err) {
evilNotExtracted = true
}
// is 'safefile' safely extracted without errors inside the destination path?
if _, err := os.Stat("testdata/testarchives/destarchives/safedir/safefile"); !os.IsNotExist(err) {
safeExtracted = true
}
return evilNotExtracted && safeExtracted
}
var archiveFormats = []interface{}{
DefaultZip,
DefaultTar,
DefaultTarBrotli,
DefaultTarBz2,
DefaultTarGz,
DefaultTarLz4,
DefaultTarSz,
DefaultTarXz,
DefaultTarZstd,
}
type archiverUnarchiver interface {
Archiver
Unarchiver
}
type fakeFileInfo struct {
name string
size int64
mode os.FileMode
modTime time.Time
isDir bool
sys interface{}
}
func (ffi fakeFileInfo) Name() string { return ffi.name }
func (ffi fakeFileInfo) Size() int64 { return ffi.size }
func (ffi fakeFileInfo) Mode() os.FileMode { return ffi.mode }
func (ffi fakeFileInfo) ModTime() time.Time { return ffi.modTime }
func (ffi fakeFileInfo) IsDir() bool { return ffi.isDir }
func (ffi fakeFileInfo) Sys() interface{} { return ffi.sys }

55
brotli.go Normal file
View file

@ -0,0 +1,55 @@
package archiver
import (
"fmt"
"io"
"path/filepath"
"github.com/andybalholm/brotli"
)
// Brotli facilitates brotli compression.
type Brotli struct {
Quality int
}
// Compress reads in, compresses it, and writes it to out.
func (br *Brotli) Compress(in io.Reader, out io.Writer) error {
w := brotli.NewWriterLevel(out, br.Quality)
defer w.Close()
_, err := io.Copy(w, in)
return err
}
// Decompress reads in, decompresses it, and writes it to out.
func (br *Brotli) Decompress(in io.Reader, out io.Writer) error {
r := brotli.NewReader(in)
_, err := io.Copy(out, r)
return err
}
// CheckExt ensures the file extension matches the format.
func (br *Brotli) CheckExt(filename string) error {
if filepath.Ext(filename) != ".br" {
return fmt.Errorf("filename must have a .br extension")
}
return nil
}
func (br *Brotli) String() string { return "brotli" }
// NewBrotli returns a new, default instance ready to be customized and used.
func NewBrotli() *Brotli {
return &Brotli{
Quality: brotli.DefaultCompression,
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Compressor(new(Brotli))
_ = Decompressor(new(Brotli))
)
// DefaultBrotli is a default instance that is conveniently ready to use.
var DefaultBrotli = NewBrotli()

13
build.bash Executable file
View file

@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -ex
# This script builds archiver for most common platforms.
export CGO_ENABLED=0
cd cmd/arc
GOOS=linux GOARCH=amd64 go build -o ../../builds/arc_linux_amd64
GOOS=linux GOARCH=arm go build -o ../../builds/arc_linux_arm7
GOOS=darwin GOARCH=amd64 go build -o ../../builds/arc_mac_amd64
GOOS=windows GOARCH=amd64 go build -o ../../builds/arc_windows_amd64.exe
cd ../..

64
bz2.go Normal file
View file

@ -0,0 +1,64 @@
package archiver
import (
"fmt"
"io"
"path/filepath"
"github.com/dsnet/compress/bzip2"
)
// Bz2 facilitates bzip2 compression.
type Bz2 struct {
CompressionLevel int
}
// Compress reads in, compresses it, and writes it to out.
func (bz *Bz2) Compress(in io.Reader, out io.Writer) error {
w, err := bzip2.NewWriter(out, &bzip2.WriterConfig{
Level: bz.CompressionLevel,
})
if err != nil {
return err
}
defer w.Close()
_, err = io.Copy(w, in)
return err
}
// Decompress reads in, decompresses it, and writes it to out.
func (bz *Bz2) Decompress(in io.Reader, out io.Writer) error {
r, err := bzip2.NewReader(in, nil)
if err != nil {
return err
}
defer r.Close()
_, err = io.Copy(out, r)
return err
}
// CheckExt ensures the file extension matches the format.
func (bz *Bz2) CheckExt(filename string) error {
if filepath.Ext(filename) != ".bz2" {
return fmt.Errorf("filename must have a .bz2 extension")
}
return nil
}
func (bz *Bz2) String() string { return "bz2" }
// NewBz2 returns a new, default instance ready to be customized and used.
func NewBz2() *Bz2 {
return &Bz2{
CompressionLevel: bzip2.DefaultCompression,
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Compressor(new(Bz2))
_ = Decompressor(new(Bz2))
)
// DefaultBz2 is a default instance that is conveniently ready to use.
var DefaultBz2 = NewBz2()

376
cmd/arc/main.go Normal file
View file

@ -0,0 +1,376 @@
package main
import (
"archive/tar"
"bytes"
"compress/flate"
"flag"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/klauspost/compress/zip"
"github.com/mholt/archiver/v3"
"github.com/nwaples/rardecode"
)
var (
compressionLevel int
overwriteExisting bool
mkdirAll bool
selectiveCompression bool
implicitTopLevelFolder bool
stripComponents int
continueOnError bool
specifyFileType string
)
var (
version string
commit string
date string
)
func init() {
flag.IntVar(&compressionLevel, "level", flate.DefaultCompression, "Compression level")
flag.BoolVar(&overwriteExisting, "overwrite", false, "Overwrite existing files")
flag.BoolVar(&mkdirAll, "mkdirs", false, "Make all necessary directories")
flag.BoolVar(&selectiveCompression, "smart", true, "Only compress files which are not already compressed (zip only)")
flag.BoolVar(&implicitTopLevelFolder, "folder-safe", true, "If an archive does not have a single top-level folder, create one implicitly")
flag.IntVar(&stripComponents, "strip-components", 0, "Strip number of leading paths")
flag.BoolVar(&continueOnError, "allow-errors", true, "Log errors and continue processing")
flag.StringVar(&specifyFileType, "ext", "", "specify file type")
}
func main() {
if len(os.Args) >= 2 &&
(os.Args[1] == "-h" || os.Args[1] == "--help" || os.Args[1] == "help") {
fmt.Println(usageString())
os.Exit(0)
}
if len(os.Args) >= 2 &&
(os.Args[1] == "-V" || os.Args[1] == "--version" || os.Args[1] == "version") {
fmt.Printf("arc v%s %s (%s)", version, commit, date)
os.Exit(0)
}
if len(os.Args) < 3 {
fatal(usageString())
}
flag.Parse()
subcommand := flag.Arg(0)
// get the format we're working with
iface, err := getFormat(subcommand)
if err != nil {
fatal(err)
}
// run the desired command
switch subcommand {
case "archive":
a, ok := iface.(archiver.Archiver)
if !ok {
fatalf("the archive command does not support the %s format", iface)
}
var sources []string
for _, src := range flag.Args()[2:] {
srcs, err := filepath.Glob(src)
if err != nil {
fatalf(err.Error())
}
sources = append(sources, srcs...)
}
err = a.Archive(sources, flag.Arg(1))
case "unarchive":
u, ok := iface.(archiver.Unarchiver)
if !ok {
fatalf("the unarchive command does not support the %s format", iface)
}
err = u.Unarchive(flag.Arg(1), flag.Arg(2))
case "extract":
e, ok := iface.(archiver.Extractor)
if !ok {
fatalf("the extract command does not support the %s format", iface)
}
err = e.Extract(flag.Arg(1), flag.Arg(2), flag.Arg(3))
case "ls":
w, ok := iface.(archiver.Walker)
if !ok {
fatalf("the ls command does not support the %s format", iface)
}
var count int
err = w.Walk(flag.Arg(1), func(f archiver.File) error {
count++
switch h := f.Header.(type) {
case zip.FileHeader:
fmt.Printf("%s\t%d\t%d\t%s\t%s\n",
f.Mode(),
h.Method,
f.Size(),
f.ModTime(),
h.Name,
)
case *tar.Header:
fmt.Printf("%s\t%s\t%s\t%d\t%s\t%s\n",
f.Mode(),
h.Uname,
h.Gname,
f.Size(),
f.ModTime(),
h.Name,
)
case *rardecode.FileHeader:
fmt.Printf("%s\t%d\t%d\t%s\t%s\n",
f.Mode(),
int(h.HostOS),
f.Size(),
f.ModTime(),
h.Name,
)
default:
fmt.Printf("%s\t%d\t%s\t?/%s\n",
f.Mode(),
f.Size(),
f.ModTime(),
f.Name(), // we don't know full path from this
)
}
return nil
})
fmt.Printf("total %d\n", count)
case "compress":
c, ok := iface.(archiver.Compressor)
if !ok {
fatalf("the compress command does not support the %s format", iface)
}
fc := archiver.FileCompressor{Compressor: c}
in := flag.Arg(1)
out := flag.Arg(2)
var deleteWhenDone bool
if cs, ok := c.(fmt.Stringer); ok && out == cs.String() {
out = in + "." + out
deleteWhenDone = true
}
err = fc.CompressFile(in, out)
if err == nil && deleteWhenDone {
err = os.Remove(in)
}
case "decompress":
c, ok := iface.(archiver.Decompressor)
if !ok {
fatalf("the compress command does not support the %s format", iface)
}
fc := archiver.FileCompressor{Decompressor: c}
in := flag.Arg(1)
out := flag.Arg(2)
var deleteWhenDone bool
if cs, ok := c.(fmt.Stringer); ok && out == "" {
out = strings.TrimSuffix(in, "."+cs.String())
deleteWhenDone = true
}
err = fc.DecompressFile(in, out)
if err == nil && deleteWhenDone {
err = os.Remove(in)
}
default:
fatalf("unrecognized command: %s", flag.Arg(0))
}
if err != nil {
fatal(err)
}
}
func getFormat(subcommand string) (interface{}, error) {
// prepare the filename, with which we will find a suitable format
formatPos := 1
if subcommand == "compress" {
formatPos = 2
}
filename := flag.Arg(formatPos)
if subcommand == "compress" && !strings.Contains(filename, ".") {
filename = "." + filename // leading dot needed for extension matching
}
// get the format by filename extension
if specifyFileType != "" {
filename = "." + specifyFileType
}
f, err := archiver.ByExtension(filename)
if err != nil {
return nil, err
}
// prepare a single Tar, in case it's needed
mytar := &archiver.Tar{
OverwriteExisting: overwriteExisting,
MkdirAll: mkdirAll,
ImplicitTopLevelFolder: implicitTopLevelFolder,
StripComponents: stripComponents,
ContinueOnError: continueOnError,
}
// fully configure the new value
switch v := f.(type) {
case *archiver.Rar:
v.OverwriteExisting = overwriteExisting
v.MkdirAll = mkdirAll
v.ImplicitTopLevelFolder = implicitTopLevelFolder
v.StripComponents = stripComponents
v.ContinueOnError = continueOnError
v.Password = os.Getenv("ARCHIVE_PASSWORD")
case *archiver.Tar:
f = mytar
case *archiver.TarBrotli:
v.Tar = mytar
v.Quality = compressionLevel
case *archiver.TarBz2:
v.Tar = mytar
v.CompressionLevel = compressionLevel
case *archiver.TarGz:
v.Tar = mytar
v.CompressionLevel = compressionLevel
case *archiver.TarLz4:
v.Tar = mytar
v.CompressionLevel = compressionLevel
case *archiver.TarSz:
v.Tar = mytar
case *archiver.TarXz:
v.Tar = mytar
case *archiver.TarZstd:
v.Tar = mytar
case *archiver.Zip:
v.CompressionLevel = compressionLevel
v.OverwriteExisting = overwriteExisting
v.MkdirAll = mkdirAll
v.SelectiveCompression = selectiveCompression
v.ImplicitTopLevelFolder = implicitTopLevelFolder
v.StripComponents = stripComponents
v.ContinueOnError = continueOnError
case *archiver.Gz:
v.CompressionLevel = compressionLevel
case *archiver.Brotli:
v.Quality = compressionLevel
case *archiver.Bz2:
v.CompressionLevel = compressionLevel
case *archiver.Lz4:
v.CompressionLevel = compressionLevel
case *archiver.Snappy:
// nothing to customize
case *archiver.Xz:
// nothing to customize
case *archiver.Zstd:
// nothing to customize
default:
return nil, fmt.Errorf("format does not support customization: %s", f)
}
return f, nil
}
func fatal(v ...interface{}) {
fmt.Fprintln(os.Stderr, v...)
os.Exit(1)
}
func fatalf(s string, v ...interface{}) {
fmt.Fprintf(os.Stderr, s+"\n", v...)
os.Exit(1)
}
func usageString() string {
buf := new(bytes.Buffer)
buf.WriteString(usage)
flag.CommandLine.SetOutput(buf)
flag.CommandLine.PrintDefaults()
return buf.String()
}
const usage = `Usage: arc {archive|unarchive|extract|ls|compress|decompress|help} [arguments...]
archive
Create a new archive file. List the files/folders
to include in the archive; at least one required.
unarchive
Extract an archive file. Provide the archive to
open and the destination folder to extract into.
extract
Extract a single file or folder (recursively) from
an archive. First argument is the source archive,
second is the file to extract (exact path within the
archive is required), and third is destination.
ls
List the contents of the archive.
compress
Compresses a file, destination optional.
decompress
Decompresses a file, destination optional.
help
Display this help text. Also -h or --help.
SPECIFYING THE ARCHIVE FORMAT
The format of the archive is determined by its
file extension*. Supported extensions:
.zip
.tar
.tar.br
.tbr
.tar.gz
.tgz
.tar.bz2
.tbz2
.tar.xz
.txz
.tar.lz4
.tlz4
.tar.sz
.tsz
.zst
.tar.zst
.rar (open only)
.bz2
.gz
.lz4
.sz
.xz
*use flag --ext to manually set filetype. example: --ext=tar.gz
(DE)COMPRESSING SINGLE FILES
Some formats are compression-only, and can be used
with the compress and decompress commands on a
single file; they do not bundle multiple files.
To replace a file when compressing, specify the
source file name for the first argument, and the
compression format (without leading dot) for the
second argument. To replace a file when decompressing,
specify only the source file and no destination.
PASSWORD-PROTECTED RAR FILES
Export the ARCHIVE_PASSWORD environment variable
to be able to open password-protected rar archives.
GLOBAL FLAG REFERENCE
The following global flags may be used before the
sub-command (some flags are format-specific):
`

260
doc_test.go Normal file
View file

@ -0,0 +1,260 @@
package archiver
import (
"fmt"
"io"
"log"
"net/http"
"os"
"strconv"
)
// The simplest use of this package: create an archive file
// from a list of filenames. This is the recommended way to
// do so using a default configuration, as it guarantees
// the file format matches the file extension, because the
// format to write is determined by the given extension.
func ExampleArchive() {
// any files in this list are added
// to the top level of the archive;
// directories are recursively added
files := []string{
"index.html",
"photo.jpg",
"blog", // directory
"/home/website/copyright.txt",
}
// archive format is determined by file extension
err := Archive(files, "blog_site.zip")
if err != nil {
log.Fatal(err)
}
}
// The simplest use of this package: extract all of an archive's
// contents to a folder on disk using the default configuration.
// The archive format is determined automatically.
func ExampleUnarchive() {
err := Unarchive("blog_site.zip", "extracted/mysite")
if err != nil {
log.Fatal(err)
}
}
// In this example, the DefaultZip is being customized so that
// all calls to its methods will use that configuration.
func ExampleZip_default() {
DefaultZip.OverwriteExisting = true
DefaultZip.ImplicitTopLevelFolder = true
// any subsequent use of DefaultZip uses
// this modified configuration
}
// Here we create our own instance of the Zip format. No need
// to use the constructor function (NewZip) or the default
// instance (DefaultZip) if we do not want to. Instantiating
// the type like this allows us to easily be very explicit
// about our configuration.
func ExampleZip_custom() {
z := &Zip{
CompressionLevel: 3,
OverwriteExisting: false,
MkdirAll: true,
SelectiveCompression: true,
ImplicitTopLevelFolder: true,
ContinueOnError: false,
}
// z is now ready to use for whatever (this is a dumb example)
fmt.Println(z.CheckExt("test.zip"))
}
// Much like the package-level Archive function, this creates an
// archive using the configuration of the Zip instance it is called
// on. The output filename must match the format's recognized file
// extension(s).
func ExampleZip_Archive() {
err := DefaultZip.Archive([]string{"..."}, "example.zip")
if err != nil {
log.Fatal(err)
}
}
// It's easy to list the items in an archive. This example
// prints the name and size of each file in the archive. Like
// other top-level functions in this package, the format is
// inferred automatically for you.
func ExampleWalk() {
err := Walk("example.tar.gz", func(f File) error {
fmt.Println(f.Name(), f.Size())
// you could also read the contents; f is an io.Reader!
return nil
})
if err != nil {
log.Fatal(err)
}
}
// This example extracts target.txt from inside example.rar
// and puts it into a folder on disk called output/dir.
func ExampleExtract() {
err := Extract("example.rar", "target.txt", "output/dir")
if err != nil {
log.Fatal(err)
}
}
// This example demonstrates how to read an
// archive in a streaming fashion. The idea
// is that you can stream the bytes of an
// archive from a stream, regardless of
// whether it is an actual file on disk.
// This means that you can read a huge
// archive file-by-file rather than having
// to store it all on disk first. In this
// example, we read a hypothetical archive
// from a (fake) HTTP request body and
// print its file names and sizes. The
// files can be read, of course, but they
// do not have to be.
func ExampleZip_streamingRead() {
// for the sake of the example compiling, pretend we have an HTTP request
req := new(http.Request)
contentLen, err := strconv.Atoi(req.Header.Get("Content-Length"))
if err != nil {
log.Fatal(err)
}
// the Zip format requires knowing the length of the stream,
// but other formats don't generally require it, so it
// could be left as 0 when using those
err = DefaultZip.Open(req.Body, int64(contentLen))
if err != nil {
log.Fatal(err)
}
defer DefaultZip.Close()
// Note that DefaultZip now contains some state that
// is critical to reading the stream until it is closed,
// so do not reuse it until then.
// iterate each file in the archive until EOF
for {
f, err := DefaultZip.Read()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
// f is an io.ReadCloser, so you can read its contents
// if you wish; or you can access its header info through
// f.Header or the embedded os.FileInfo
fmt.Println("File name:", f.Name(), "File size:", f.Size())
// be sure to close f before moving on!!
err = f.Close()
if err != nil {
log.Fatal(err)
}
}
}
// This example demonstrates how to write an
// archive in a streaming fashion. The idea
// is that you can stream the bytes of a new
// archive that is created on-the-fly from
// generic streams. Those streams could be
// actual files on disk, or they could be over
// a network, or standard output, or any other
// io.Reader/io.Writer. This example only adds
// one file to the archive and writes the
// resulting archive to standard output, but you
// could add as many files as needed with a loop.
func ExampleZip_streamingWrite() {
err := DefaultZip.Create(os.Stdout)
if err != nil {
log.Fatal(err)
}
defer DefaultZip.Close()
// Note that DefaultZip now contains state
// critical to a successful write until it
// is closed, so don't reuse it for anything
// else until then.
// At this point, you can open an actual file
// to add to the archive, or the "file" could
// come from any io.ReadCloser stream. If you
// only have an io.Reader, you can use
// ReadFakeCloser to make it into an
// io.ReadCloser.
// The next part is a little tricky if you
// don't have an actual file because you will
// need an os.FileInfo. Fortunately, that's an
// interface! So go ahead and implement it in
// whatever way makes the most sense to you.
// You'll also need to give the file a name
// for within the archive. In this example,
// we'll open a real file.
file, err := os.Open("foo.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
fileInfo, err := file.Stat()
if err != nil {
log.Fatal(err)
}
err = DefaultZip.Write(File{
FileInfo: FileInfo{
FileInfo: fileInfo,
CustomName: "name/in/archive.txt",
},
ReadCloser: file, // does not have to be an actual file
})
if err != nil {
log.Fatal(err)
}
}
// This example compresses a standard tar file into a tar.gz file.
// Compression formats are selected by file extension.
func ExampleCompressFile() {
err := CompressFile("example.tar", "example.tar.gz")
if err != nil {
log.Fatal(err)
}
}
// This example changes the default configuration for
// the Gz compression format.
func ExampleCompressFile_custom() {
DefaultGz.CompressionLevel = 5
// any calls to DefaultGz now use the modified configuration
}
// This example creates a new Gz instance and
// uses it to compress a stream, writing to
// another stream. This is sometimes preferable
// over modifying the DefaultGz.
func ExampleGz_Compress_custom() {
gz := &Gz{CompressionLevel: 5}
err := gz.Compress(os.Stdin, os.Stdout)
if err != nil {
log.Fatal(err)
}
}
// This example decompresses a gzipped tarball and writes
// it to an adjacent file.
func ExampleDecompressFile() {
err := DecompressFile("example.tar.gz", "example.tar")
if err != nil {
log.Fatal(err)
}
}

27
error.go Normal file
View file

@ -0,0 +1,27 @@
package archiver
import (
"fmt"
"strings"
)
// IllegalPathError is an error returned when an illegal
// path is detected during the archival process.
//
// By default, only the Filename is showed on error, but you might
// also get the absolute value of the invalid path on the AbsolutePath
// field.
type IllegalPathError struct {
AbsolutePath string
Filename string
}
func (err *IllegalPathError) Error() string {
return fmt.Sprintf("illegal file path: %s", err.Filename)
}
// IsIllegalPathError returns true if the provided error is of
// the type IllegalPathError.
func IsIllegalPathError(err error) bool {
return err != nil && strings.Contains(err.Error(), "illegal file path: ")
}

54
error_test.go Normal file
View file

@ -0,0 +1,54 @@
package archiver_test
import (
"errors"
"fmt"
"os"
"testing"
"github.com/mholt/archiver/v3"
)
func TestIllegalPathErrorString(t *testing.T) {
tests := []struct {
instance *archiver.IllegalPathError
expected string
}{
{instance: &archiver.IllegalPathError{Filename: "foo.txt"}, expected: "illegal file path: foo.txt"},
{instance: &archiver.IllegalPathError{AbsolutePath: "/tmp/bar.txt", Filename: "bar.txt"}, expected: "illegal file path: bar.txt"},
}
for i, test := range tests {
test := test
t.Run(fmt.Sprintf("Case %d", i), func(t *testing.T) {
if test.expected != test.instance.Error() {
t.Fatalf("Excepected '%s', but got '%s'", test.expected, test.instance.Error())
}
})
}
}
func TestIsIllegalPathError(t *testing.T) {
tests := []struct {
instance error
expected bool
}{
{instance: nil, expected: false},
{instance: os.ErrNotExist, expected: false},
{instance: fmt.Errorf("some error"), expected: false},
{instance: errors.New("another error"), expected: false},
{instance: &archiver.IllegalPathError{Filename: "foo.txt"}, expected: true},
}
for i, test := range tests {
test := test
t.Run(fmt.Sprintf("Case %d", i), func(t *testing.T) {
actual := archiver.IsIllegalPathError(test.instance)
if actual != test.expected {
t.Fatalf("Excepected '%v', but got '%v'", test.expected, actual)
}
})
}
}

67
filecompressor.go Normal file
View file

@ -0,0 +1,67 @@
package archiver
import (
"fmt"
"os"
)
// FileCompressor can compress and decompress single files.
type FileCompressor struct {
Compressor
Decompressor
// Whether to overwrite existing files when creating files.
OverwriteExisting bool
}
// CompressFile reads the source file and compresses it to destination.
// The destination must have a matching extension.
func (fc FileCompressor) CompressFile(source, destination string) error {
if err := fc.CheckExt(destination); err != nil {
return err
}
if fc.Compressor == nil {
return fmt.Errorf("no compressor specified")
}
if !fc.OverwriteExisting && fileExists(destination) {
return fmt.Errorf("file exists: %s", destination)
}
in, err := os.Open(source)
if err != nil {
return err
}
defer in.Close()
out, err := os.Create(destination)
if err != nil {
return err
}
defer out.Close()
return fc.Compress(in, out)
}
// DecompressFile reads the source file and decompresses it to destination.
func (fc FileCompressor) DecompressFile(source, destination string) error {
if fc.Decompressor == nil {
return fmt.Errorf("no decompressor specified")
}
if !fc.OverwriteExisting && fileExists(destination) {
return fmt.Errorf("file exists: %s", destination)
}
in, err := os.Open(source)
if err != nil {
return err
}
defer in.Close()
out, err := os.Create(destination)
if err != nil {
return err
}
defer out.Close()
return fc.Decompress(in, out)
}

122
filecompressor_test.go Normal file
View file

@ -0,0 +1,122 @@
package archiver
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"testing"
)
func TestCheckExtension(t *testing.T) {
testdir, err := ioutil.TempDir("", "archiver_checkext_test_")
if err != nil {
t.Fatalf("Making temporary directory: %v", err)
}
defer os.RemoveAll(testdir)
testfile, err := ioutil.TempFile(testdir, "compressor_test_input_*.txt")
if err != nil {
t.Fatalf("Making temporary file: %v", err)
}
defer os.Remove(testfile.Name())
defer testfile.Close()
for i, tc := range []struct {
checker ExtensionChecker
ext string // including leading dot
shouldErr bool
}{
{checker: NewBz2(), ext: ".bz2", shouldErr: false},
{checker: NewBz2(), ext: ".gz", shouldErr: true},
{checker: NewGz(), ext: ".gz", shouldErr: false},
{checker: NewGz(), ext: ".sz", shouldErr: true},
{checker: NewLz4(), ext: ".lz4", shouldErr: false},
{checker: NewLz4(), ext: ".xz", shouldErr: true},
{checker: NewSnappy(), ext: ".sz", shouldErr: false},
{checker: NewSnappy(), ext: ".lz4", shouldErr: true},
{checker: NewXz(), ext: ".xz", shouldErr: false},
{checker: NewXz(), ext: ".bz2", shouldErr: true},
{checker: NewZip(), ext: ".zip", shouldErr: false},
{checker: NewZip(), ext: ".zip.gz", shouldErr: true},
{checker: NewZip(), ext: ".tgz", shouldErr: true},
{checker: NewZip(), ext: ".gz", shouldErr: true},
{checker: NewTar(), ext: ".tar", shouldErr: false},
{checker: NewTar(), ext: ".zip", shouldErr: true},
{checker: NewTar(), ext: ".tar.gz", shouldErr: true},
{checker: NewTar(), ext: ".tgz", shouldErr: true},
{checker: NewTarBz2(), ext: ".tar.bz2", shouldErr: false},
{checker: NewTarBz2(), ext: ".tbz2", shouldErr: false},
{checker: NewTarBz2(), ext: ".zip", shouldErr: true},
{checker: NewTarBz2(), ext: ".tar", shouldErr: true},
{checker: NewTarBz2(), ext: ".bz2", shouldErr: true},
{checker: NewTarGz(), ext: ".tar.gz", shouldErr: false},
{checker: NewTarGz(), ext: ".tgz", shouldErr: false},
{checker: NewTarGz(), ext: ".zip", shouldErr: true},
{checker: NewTarGz(), ext: ".tar", shouldErr: true},
{checker: NewTarGz(), ext: ".gz", shouldErr: true},
{checker: NewTarLz4(), ext: ".tar.lz4", shouldErr: false},
{checker: NewTarLz4(), ext: ".tlz4", shouldErr: false},
{checker: NewTarLz4(), ext: ".zip", shouldErr: true},
{checker: NewTarLz4(), ext: ".tar", shouldErr: true},
{checker: NewTarLz4(), ext: ".lz4", shouldErr: true},
{checker: NewTarSz(), ext: ".tar.sz", shouldErr: false},
{checker: NewTarSz(), ext: ".tsz", shouldErr: false},
{checker: NewTarSz(), ext: ".zip", shouldErr: true},
{checker: NewTarSz(), ext: ".tar", shouldErr: true},
{checker: NewTarSz(), ext: ".sz", shouldErr: true},
{checker: NewTarXz(), ext: ".tar.xz", shouldErr: false},
{checker: NewTarXz(), ext: ".txz", shouldErr: false},
{checker: NewTarXz(), ext: ".zip", shouldErr: true},
{checker: NewTarXz(), ext: ".tar", shouldErr: true},
{checker: NewTarXz(), ext: ".xz", shouldErr: true},
} {
err := tc.checker.CheckExt("test" + tc.ext)
if tc.shouldErr && err == nil {
t.Errorf("Test %d [%s - %s]: Expected an error when checking extension, but got none",
i, tc.checker, tc.ext)
}
if !tc.shouldErr && err != nil {
t.Errorf("Test %d [%s - %s]: Did not expect an error when checking extension, but got: %v",
i, tc.checker, tc.ext, err)
}
// also ensure that methods which create files check the extension,
// to avoid confusion where the extension indicates one format but
// actual format is another
if a, ok := tc.checker.(Archiver); ok {
filename := fmt.Sprintf("test%d_archive%s", i, tc.ext)
err := a.Archive(nil, filepath.Join(testdir, filename))
if tc.shouldErr && err == nil {
t.Errorf("Test %d [%s - %s]: Archive(): Expected an error with filename '%s' but got none",
i, tc.checker, tc.ext, filename)
}
if !tc.shouldErr && err != nil {
t.Errorf("Test %d [%s - %s]: Archive(): Did not expect an error with filename '%s', but got: %v",
i, tc.checker, tc.ext, filename, err)
}
}
if c, ok := tc.checker.(FileCompressor); ok {
filename := fmt.Sprintf("test%d_compress%s", i, tc.ext)
err := c.CompressFile(testfile.Name(), filepath.Join(testdir, filename))
if tc.shouldErr && err == nil {
t.Errorf("Test %d [%s - %s]: Compress(): Expected an error with filename '%s' but got none",
i, tc.checker, tc.ext, filename)
}
if !tc.shouldErr && err != nil {
t.Errorf("Test %d [%s - %s]: Compress(): Did not expect an error with filename '%s', but got: %v",
i, tc.checker, tc.ext, filename, err)
}
}
}
}

15
go.mod Normal file
View file

@ -0,0 +1,15 @@
module github.com/mholt/archiver/v3
go 1.13
require (
github.com/andybalholm/brotli v1.0.1
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5
github.com/golang/snappy v0.0.2
github.com/klauspost/compress v1.11.4
github.com/klauspost/pgzip v1.2.5
github.com/nwaples/rardecode v1.1.0
github.com/pierrec/lz4/v4 v4.1.2
github.com/ulikunitz/xz v0.5.9
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8
)

25
go.sum Normal file
View file

@ -0,0 +1,25 @@
github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc=
github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw=
github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.11.4 h1:kz40R/YWls3iqT9zX9AHN3WoVsrAWVyui5sxuLqiXqU=
github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

76
gz.go Normal file
View file

@ -0,0 +1,76 @@
package archiver
import (
"fmt"
"io"
"path/filepath"
"github.com/klauspost/compress/gzip"
"github.com/klauspost/pgzip"
)
// Gz facilitates gzip compression.
type Gz struct {
CompressionLevel int
SingleThreaded bool
}
// Compress reads in, compresses it, and writes it to out.
func (gz *Gz) Compress(in io.Reader, out io.Writer) error {
var w io.WriteCloser
var err error
if gz.SingleThreaded {
w, err = gzip.NewWriterLevel(out, gz.CompressionLevel)
} else {
w, err = pgzip.NewWriterLevel(out, gz.CompressionLevel)
}
if err != nil {
return err
}
defer w.Close()
_, err = io.Copy(w, in)
return err
}
// Decompress reads in, decompresses it, and writes it to out.
func (gz *Gz) Decompress(in io.Reader, out io.Writer) error {
var r io.ReadCloser
var err error
if gz.SingleThreaded {
r, err = gzip.NewReader(in)
} else {
r, err = pgzip.NewReader(in)
}
if err != nil {
return err
}
defer r.Close()
_, err = io.Copy(out, r)
return err
}
// CheckExt ensures the file extension matches the format.
func (gz *Gz) CheckExt(filename string) error {
if filepath.Ext(filename) != ".gz" {
return fmt.Errorf("filename must have a .gz extension")
}
return nil
}
func (gz *Gz) String() string { return "gz" }
// NewGz returns a new, default instance ready to be customized and used.
func NewGz() *Gz {
return &Gz{
CompressionLevel: gzip.DefaultCompression,
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Compressor(new(Gz))
_ = Decompressor(new(Gz))
)
// DefaultGz is a default instance that is conveniently ready to use.
var DefaultGz = NewGz()

63
lz4.go Normal file
View file

@ -0,0 +1,63 @@
package archiver
import (
"fmt"
"io"
"path/filepath"
"github.com/pierrec/lz4/v4"
)
// Lz4 facilitates LZ4 compression.
type Lz4 struct {
CompressionLevel int
}
// Compress reads in, compresses it, and writes it to out.
func (lz *Lz4) Compress(in io.Reader, out io.Writer) error {
w := lz4.NewWriter(out)
// TODO archiver v4: use proper lz4.Fast
// bitshifting for backwards compatibility with lz4/v3
options := []lz4.Option{
lz4.CompressionLevelOption(lz4.CompressionLevel(1 << (8 + lz.CompressionLevel))),
}
if err := w.Apply(options...); err != nil {
return err
}
defer w.Close()
_, err := io.Copy(w, in)
return err
}
// Decompress reads in, decompresses it, and writes it to out.
func (lz *Lz4) Decompress(in io.Reader, out io.Writer) error {
r := lz4.NewReader(in)
_, err := io.Copy(out, r)
return err
}
// CheckExt ensures the file extension matches the format.
func (lz *Lz4) CheckExt(filename string) error {
if filepath.Ext(filename) != ".lz4" {
return fmt.Errorf("filename must have a .lz4 extension")
}
return nil
}
func (lz *Lz4) String() string { return "lz4" }
// NewLz4 returns a new, default instance ready to be customized and used.
func NewLz4() *Lz4 {
return &Lz4{
CompressionLevel: 9, // https://github.com/lz4/lz4/blob/1b819bfd633ae285df2dfe1b0589e1ec064f2873/lib/lz4hc.h#L48
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Compressor(new(Lz4))
_ = Decompressor(new(Lz4))
)
// DefaultLz4 is a default instance that is conveniently ready to use.
var DefaultLz4 = NewLz4()

446
rar.go Normal file
View file

@ -0,0 +1,446 @@
package archiver
import (
"bytes"
"fmt"
"io"
"log"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/nwaples/rardecode"
)
// Rar provides facilities for reading RAR archives.
// See https://www.rarlab.com/technote.htm.
type Rar struct {
// Whether to overwrite existing files; if false,
// an error is returned if the file exists.
OverwriteExisting bool
// Whether to make all the directories necessary
// to create a rar archive in the desired path.
MkdirAll bool
// A single top-level folder can be implicitly
// created by the Unarchive method if the files
// to be extracted from the archive do not all
// have a common root. This roughly mimics the
// behavior of archival tools integrated into OS
// file browsers which create a subfolder to
// avoid unexpectedly littering the destination
// folder with potentially many files, causing a
// problematic cleanup/organization situation.
// This feature is available for both creation
// and extraction of archives, but may be slightly
// inefficient with lots and lots of files,
// especially on extraction.
ImplicitTopLevelFolder bool
// Strip number of leading paths. This feature is available
// only during unpacking of the entire archive.
StripComponents int
// If true, errors encountered during reading
// or writing a single file will be logged and
// the operation will continue on remaining files.
ContinueOnError bool
// The password to open archives (optional).
Password string
rr *rardecode.Reader // underlying stream reader
rc *rardecode.ReadCloser // supports multi-volume archives (files only)
}
// CheckExt ensures the file extension matches the format.
func (*Rar) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".rar") {
return fmt.Errorf("filename must have a .rar extension")
}
return nil
}
// CheckPath ensures that the filename has not been crafted to perform path traversal attacks
func (*Rar) CheckPath(to, filename string) error {
to, _ = filepath.Abs(to) //explicit the destination folder to prevent that 'string.HasPrefix' check can be 'bypassed' when no destination folder is supplied in input
dest := filepath.Join(to, filename)
//prevent path traversal attacks
if !strings.HasPrefix(dest, to) {
return &IllegalPathError{AbsolutePath: dest, Filename: filename}
}
return nil
}
// Unarchive unpacks the .rar file at source to destination.
// Destination will be treated as a folder name. It supports
// multi-volume archives.
func (r *Rar) Unarchive(source, destination string) error {
if !fileExists(destination) && r.MkdirAll {
err := mkdir(destination, 0755)
if err != nil {
return fmt.Errorf("preparing destination: %v", err)
}
}
// if the files in the archive do not all share a common
// root, then make sure we extract to a single subfolder
// rather than potentially littering the destination...
if r.ImplicitTopLevelFolder {
var err error
destination, err = r.addTopLevelFolder(source, destination)
if err != nil {
return fmt.Errorf("scanning source archive: %v", err)
}
}
err := r.OpenFile(source)
if err != nil {
return fmt.Errorf("opening rar archive for reading: %v", err)
}
defer r.Close()
for {
err := r.unrarNext(destination)
if err == io.EOF {
break
}
if err != nil {
if r.ContinueOnError || IsIllegalPathError(err) {
log.Printf("[ERROR] Reading file in rar archive: %v", err)
continue
}
return fmt.Errorf("reading file in rar archive: %v", err)
}
}
return nil
}
// addTopLevelFolder scans the files contained inside
// the tarball named sourceArchive and returns a modified
// destination if all the files do not share the same
// top-level folder.
func (r *Rar) addTopLevelFolder(sourceArchive, destination string) (string, error) {
file, err := os.Open(sourceArchive)
if err != nil {
return "", fmt.Errorf("opening source archive: %v", err)
}
defer file.Close()
rc, err := rardecode.NewReader(file, r.Password)
if err != nil {
return "", fmt.Errorf("creating archive reader: %v", err)
}
var files []string
for {
hdr, err := rc.Next()
if err == io.EOF {
break
}
if err != nil {
return "", fmt.Errorf("scanning tarball's file listing: %v", err)
}
files = append(files, hdr.Name)
}
if multipleTopLevels(files) {
destination = filepath.Join(destination, folderNameFromFileName(sourceArchive))
}
return destination, nil
}
func (r *Rar) unrarNext(to string) error {
f, err := r.Read()
if err != nil {
return err // don't wrap error; calling loop must break on io.EOF
}
defer f.Close()
header, ok := f.Header.(*rardecode.FileHeader)
if !ok {
return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header)
}
errPath := r.CheckPath(to, header.Name)
if errPath != nil {
return fmt.Errorf("checking path traversal attempt: %v", errPath)
}
if r.StripComponents > 0 {
if strings.Count(header.Name, "/") < r.StripComponents {
return nil // skip path with fewer components
}
for i := 0; i < r.StripComponents; i++ {
slash := strings.Index(header.Name, "/")
header.Name = header.Name[slash+1:]
}
}
return r.unrarFile(f, filepath.Join(to, header.Name))
}
func (r *Rar) unrarFile(f File, to string) error {
// do not overwrite existing files, if configured
if !f.IsDir() && !r.OverwriteExisting && fileExists(to) {
return fmt.Errorf("file already exists: %s", to)
}
hdr, ok := f.Header.(*rardecode.FileHeader)
if !ok {
return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header)
}
if f.IsDir() {
if fileExists("testdata") {
err := os.Chmod(to, hdr.Mode())
if err != nil {
return fmt.Errorf("changing dir mode: %v", err)
}
} else {
err := mkdir(to, hdr.Mode())
if err != nil {
return fmt.Errorf("making directories: %v", err)
}
}
return nil
}
// if files come before their containing folders, then we must
// create their folders before writing the file
err := mkdir(filepath.Dir(to), 0755)
if err != nil {
return fmt.Errorf("making parent directories: %v", err)
}
if (hdr.Mode() & os.ModeSymlink) != 0 {
return nil
}
return writeNewFile(to, r.rr, hdr.Mode())
}
// OpenFile opens filename for reading. This method supports
// multi-volume archives, whereas Open does not (but Open
// supports any stream, not just files).
func (r *Rar) OpenFile(filename string) error {
if r.rr != nil {
return fmt.Errorf("rar archive is already open for reading")
}
var err error
r.rc, err = rardecode.OpenReader(filename, r.Password)
if err != nil {
return err
}
r.rr = &r.rc.Reader
return nil
}
// Open opens t for reading an archive from
// in. The size parameter is not used.
func (r *Rar) Open(in io.Reader, size int64) error {
if r.rr != nil {
return fmt.Errorf("rar archive is already open for reading")
}
var err error
r.rr, err = rardecode.NewReader(in, r.Password)
return err
}
// Read reads the next file from t, which must have
// already been opened for reading. If there are no
// more files, the error is io.EOF. The File must
// be closed when finished reading from it.
func (r *Rar) Read() (File, error) {
if r.rr == nil {
return File{}, fmt.Errorf("rar archive is not open")
}
hdr, err := r.rr.Next()
if err != nil {
return File{}, err // don't wrap error; preserve io.EOF
}
file := File{
FileInfo: rarFileInfo{hdr},
Header: hdr,
ReadCloser: ReadFakeCloser{r.rr},
}
return file, nil
}
// Close closes the rar archive(s) opened by Create and Open.
func (r *Rar) Close() error {
var err error
if r.rc != nil {
rc := r.rc
r.rc = nil
err = rc.Close()
}
if r.rr != nil {
r.rr = nil
}
return err
}
// Walk calls walkFn for each visited item in archive.
func (r *Rar) Walk(archive string, walkFn WalkFunc) error {
file, err := os.Open(archive)
if err != nil {
return fmt.Errorf("opening archive file: %v", err)
}
defer file.Close()
err = r.Open(file, 0)
if err != nil {
return fmt.Errorf("opening archive: %v", err)
}
defer r.Close()
for {
f, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
if r.ContinueOnError {
log.Printf("[ERROR] Opening next file: %v", err)
continue
}
return fmt.Errorf("opening next file: %v", err)
}
err = walkFn(f)
if err != nil {
if err == ErrStopWalk {
break
}
if r.ContinueOnError {
log.Printf("[ERROR] Walking %s: %v", f.Name(), err)
continue
}
return fmt.Errorf("walking %s: %v", f.Name(), err)
}
}
return nil
}
// Extract extracts a single file from the rar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (r *Rar) Extract(source, target, destination string) error {
// target refers to a path inside the archive, which should be clean also
target = path.Clean(target)
// if the target ends up being a directory, then
// we will continue walking and extracting files
// until we are no longer within that directory
var targetDirPath string
return r.Walk(source, func(f File) error {
th, ok := f.Header.(*rardecode.FileHeader)
if !ok {
return fmt.Errorf("expected header to be *rardecode.FileHeader but was %T", f.Header)
}
// importantly, cleaning the path strips tailing slash,
// which must be appended to folders within the archive
name := path.Clean(th.Name)
if f.IsDir() && target == name {
targetDirPath = path.Dir(name)
}
if within(target, th.Name) {
// either this is the exact file we want, or is
// in the directory we want to extract
// build the filename we will extract to
end, err := filepath.Rel(targetDirPath, th.Name)
if err != nil {
return fmt.Errorf("relativizing paths: %v", err)
}
joined := filepath.Join(destination, end)
err = r.unrarFile(f, joined)
if err != nil {
return fmt.Errorf("extracting file %s: %v", th.Name, err)
}
// if our target was not a directory, stop walk
if targetDirPath == "" {
return ErrStopWalk
}
} else if targetDirPath != "" {
// finished walking the entire directory
return ErrStopWalk
}
return nil
})
}
// Match returns true if the format of file matches this
// type's format. It should not affect reader position.
func (*Rar) Match(file io.ReadSeeker) (bool, error) {
currentPos, err := file.Seek(0, io.SeekCurrent)
if err != nil {
return false, err
}
_, err = file.Seek(0, 0)
if err != nil {
return false, err
}
defer func() {
_, _ = file.Seek(currentPos, io.SeekStart)
}()
buf := make([]byte, 8)
if n, err := file.Read(buf); err != nil || n < 8 {
return false, nil
}
hasRarHeader := bytes.Equal(buf[:7], []byte("Rar!\x1a\x07\x00")) || // ver 1.5
bytes.Equal(buf, []byte("Rar!\x1a\x07\x01\x00")) // ver 5.0
return hasRarHeader, nil
}
func (r *Rar) String() string { return "rar" }
// NewRar returns a new, default instance ready to be customized and used.
func NewRar() *Rar {
return &Rar{
MkdirAll: true,
}
}
type rarFileInfo struct {
fh *rardecode.FileHeader
}
func (rfi rarFileInfo) Name() string { return rfi.fh.Name }
func (rfi rarFileInfo) Size() int64 { return rfi.fh.UnPackedSize }
func (rfi rarFileInfo) Mode() os.FileMode { return rfi.fh.Mode() }
func (rfi rarFileInfo) ModTime() time.Time { return rfi.fh.ModificationTime }
func (rfi rarFileInfo) IsDir() bool { return rfi.fh.IsDir }
func (rfi rarFileInfo) Sys() interface{} { return nil }
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(Rar))
_ = Unarchiver(new(Rar))
_ = Walker(new(Rar))
_ = Extractor(new(Rar))
_ = Matcher(new(Rar))
_ = ExtensionChecker(new(Rar))
_ = FilenameChecker(new(Rar))
_ = os.FileInfo(rarFileInfo{})
)
// DefaultRar is a default instance that is conveniently ready to use.
var DefaultRar = NewRar()

51
sz.go Normal file
View file

@ -0,0 +1,51 @@
package archiver
import (
"fmt"
"io"
"path/filepath"
"github.com/golang/snappy"
)
// Snappy facilitates Snappy compression.
type Snappy struct{}
// Compress reads in, compresses it, and writes it to out.
func (s *Snappy) Compress(in io.Reader, out io.Writer) error {
w := snappy.NewBufferedWriter(out)
defer w.Close()
_, err := io.Copy(w, in)
return err
}
// Decompress reads in, decompresses it, and writes it to out.
func (s *Snappy) Decompress(in io.Reader, out io.Writer) error {
r := snappy.NewReader(in)
_, err := io.Copy(out, r)
return err
}
// CheckExt ensures the file extension matches the format.
func (s *Snappy) CheckExt(filename string) error {
if filepath.Ext(filename) != ".sz" {
return fmt.Errorf("filename must have a .sz extension")
}
return nil
}
func (s *Snappy) String() string { return "sz" }
// NewSnappy returns a new, default instance ready to be customized and used.
func NewSnappy() *Snappy {
return new(Snappy)
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Compressor(new(Snappy))
_ = Decompressor(new(Snappy))
)
// DefaultSnappy is a default instance that is conveniently ready to use.
var DefaultSnappy = NewSnappy()

659
tar.go Normal file
View file

@ -0,0 +1,659 @@
package archiver
import (
"archive/tar"
"bytes"
"fmt"
"io"
"log"
"os"
"path"
"path/filepath"
"strconv"
"strings"
)
// Tar provides facilities for operating TAR archives.
// See http://www.gnu.org/software/tar/manual/html_node/Standard.html.
type Tar struct {
// Whether to overwrite existing files; if false,
// an error is returned if the file exists.
OverwriteExisting bool
// Whether to make all the directories necessary
// to create a tar archive in the desired path.
MkdirAll bool
// A single top-level folder can be implicitly
// created by the Archive or Unarchive methods
// if the files to be added to the archive
// or the files to be extracted from the archive
// do not all have a common root. This roughly
// mimics the behavior of archival tools integrated
// into OS file browsers which create a subfolder
// to avoid unexpectedly littering the destination
// folder with potentially many files, causing a
// problematic cleanup/organization situation.
// This feature is available for both creation
// and extraction of archives, but may be slightly
// inefficient with lots and lots of files,
// especially on extraction.
ImplicitTopLevelFolder bool
// Strip number of leading paths. This feature is available
// only during unpacking of the entire archive.
StripComponents int
// If true, errors encountered during reading
// or writing a single file will be logged and
// the operation will continue on remaining files.
ContinueOnError bool
tw *tar.Writer
tr *tar.Reader
readerWrapFn func(io.Reader) (io.Reader, error)
writerWrapFn func(io.Writer) (io.Writer, error)
cleanupWrapFn func()
}
// CheckExt ensures the file extension matches the format.
func (*Tar) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".tar") {
return fmt.Errorf("filename must have a .tar extension")
}
return nil
}
// CheckPath ensures that the filename has not been crafted to perform path traversal attacks
func (*Tar) CheckPath(to, filename string) error {
to, _ = filepath.Abs(to) //explicit the destination folder to prevent that 'string.HasPrefix' check can be 'bypassed' when no destination folder is supplied in input
dest := filepath.Join(to, filename)
//prevent path traversal attacks
if !strings.HasPrefix(dest, to) {
return &IllegalPathError{AbsolutePath: dest, Filename: filename}
}
return nil
}
// Archive creates a tarball file at destination containing
// the files listed in sources. The destination must end with
// ".tar". File paths can be those of regular files or
// directories; directories will be recursively added.
func (t *Tar) Archive(sources []string, destination string) error {
err := t.CheckExt(destination)
if t.writerWrapFn == nil && err != nil {
return fmt.Errorf("checking extension: %v", err)
}
if !t.OverwriteExisting && fileExists(destination) {
return fmt.Errorf("file already exists: %s", destination)
}
// make the folder to contain the resulting archive
// if it does not already exist
destDir := filepath.Dir(destination)
if t.MkdirAll && !fileExists(destDir) {
err := mkdir(destDir, 0755)
if err != nil {
return fmt.Errorf("making folder for destination: %v", err)
}
}
out, err := os.Create(destination)
if err != nil {
return fmt.Errorf("creating %s: %v", destination, err)
}
defer out.Close()
err = t.Create(out)
if err != nil {
return fmt.Errorf("creating tar: %v", err)
}
defer t.Close()
var topLevelFolder string
if t.ImplicitTopLevelFolder && multipleTopLevels(sources) {
topLevelFolder = folderNameFromFileName(destination)
}
for _, source := range sources {
err := t.writeWalk(source, topLevelFolder, destination)
if err != nil {
return fmt.Errorf("walking %s: %v", source, err)
}
}
return nil
}
// Unarchive unpacks the .tar file at source to destination.
// Destination will be treated as a folder name.
func (t *Tar) Unarchive(source, destination string) error {
if !fileExists(destination) && t.MkdirAll {
err := mkdir(destination, 0755)
if err != nil {
return fmt.Errorf("preparing destination: %v", err)
}
}
// if the files in the archive do not all share a common
// root, then make sure we extract to a single subfolder
// rather than potentially littering the destination...
if t.ImplicitTopLevelFolder {
var err error
destination, err = t.addTopLevelFolder(source, destination)
if err != nil {
return fmt.Errorf("scanning source archive: %v", err)
}
}
file, err := os.Open(source)
if err != nil {
return fmt.Errorf("opening source archive: %v", err)
}
defer file.Close()
err = t.Open(file, 0)
if err != nil {
return fmt.Errorf("opening tar archive for reading: %v", err)
}
defer t.Close()
for {
err := t.untarNext(destination)
if err == io.EOF {
break
}
if err != nil {
if t.ContinueOnError || IsIllegalPathError(err) {
log.Printf("[ERROR] Reading file in tar archive: %v", err)
continue
}
return fmt.Errorf("reading file in tar archive: %v", err)
}
}
return nil
}
// addTopLevelFolder scans the files contained inside
// the tarball named sourceArchive and returns a modified
// destination if all the files do not share the same
// top-level folder.
func (t *Tar) addTopLevelFolder(sourceArchive, destination string) (string, error) {
file, err := os.Open(sourceArchive)
if err != nil {
return "", fmt.Errorf("opening source archive: %v", err)
}
defer file.Close()
// if the reader is to be wrapped, ensure we do that now
// or we will not be able to read the archive successfully
reader := io.Reader(file)
if t.readerWrapFn != nil {
reader, err = t.readerWrapFn(reader)
if err != nil {
return "", fmt.Errorf("wrapping reader: %v", err)
}
}
if t.cleanupWrapFn != nil {
defer t.cleanupWrapFn()
}
tr := tar.NewReader(reader)
var files []string
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return "", fmt.Errorf("scanning tarball's file listing: %v", err)
}
files = append(files, hdr.Name)
}
if multipleTopLevels(files) {
destination = filepath.Join(destination, folderNameFromFileName(sourceArchive))
}
return destination, nil
}
func (t *Tar) untarNext(destination string) error {
f, err := t.Read()
if err != nil {
return err // don't wrap error; calling loop must break on io.EOF
}
defer f.Close()
header, ok := f.Header.(*tar.Header)
if !ok {
return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header)
}
errPath := t.CheckPath(destination, header.Name)
if errPath != nil {
return fmt.Errorf("checking path traversal attempt: %v", errPath)
}
if t.StripComponents > 0 {
if strings.Count(header.Name, "/") < t.StripComponents {
return nil // skip path with fewer components
}
for i := 0; i < t.StripComponents; i++ {
slash := strings.Index(header.Name, "/")
header.Name = header.Name[slash+1:]
}
}
return t.untarFile(f, destination, header)
}
func (t *Tar) untarFile(f File, destination string, hdr *tar.Header) error {
to := filepath.Join(destination, hdr.Name)
// do not overwrite existing files, if configured
if !f.IsDir() && !t.OverwriteExisting && fileExists(to) {
return fmt.Errorf("file already exists: %s", to)
}
switch hdr.Typeflag {
case tar.TypeDir:
return mkdir(to, f.Mode())
case tar.TypeReg, tar.TypeRegA, tar.TypeChar, tar.TypeBlock, tar.TypeFifo, tar.TypeGNUSparse:
return writeNewFile(to, f, f.Mode())
case tar.TypeSymlink:
return writeNewSymbolicLink(to, hdr.Linkname)
case tar.TypeLink:
return writeNewHardLink(to, filepath.Join(destination, hdr.Linkname))
case tar.TypeXGlobalHeader:
return nil // ignore the pax global header from git-generated tarballs
default:
return fmt.Errorf("%s: unknown type flag: %c", hdr.Name, hdr.Typeflag)
}
}
func (t *Tar) writeWalk(source, topLevelFolder, destination string) error {
sourceInfo, err := os.Stat(source)
if err != nil {
return fmt.Errorf("%s: stat: %v", source, err)
}
destAbs, err := filepath.Abs(destination)
if err != nil {
return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err)
}
return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error {
handleErr := func(err error) error {
if t.ContinueOnError {
log.Printf("[ERROR] Walking %s: %v", fpath, err)
return nil
}
return err
}
if err != nil {
return handleErr(fmt.Errorf("traversing %s: %v", fpath, err))
}
if info == nil {
return handleErr(fmt.Errorf("no file info"))
}
// make sure we do not copy our output file into itself
fpathAbs, err := filepath.Abs(fpath)
if err != nil {
return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err))
}
if within(fpathAbs, destAbs) {
return nil
}
// build the name to be used within the archive
nameInArchive, err := makeNameInArchive(sourceInfo, source, topLevelFolder, fpath)
if err != nil {
return handleErr(err)
}
var file io.ReadCloser
if info.Mode().IsRegular() {
file, err = os.Open(fpath)
if err != nil {
return handleErr(fmt.Errorf("%s: opening: %v", fpath, err))
}
defer file.Close()
}
err = t.Write(File{
FileInfo: FileInfo{
FileInfo: info,
CustomName: nameInArchive,
SourcePath: fpath,
},
ReadCloser: file,
})
if err != nil {
return handleErr(fmt.Errorf("%s: writing: %s", fpath, err))
}
return nil
})
}
// Create opens t for writing a tar archive to out.
func (t *Tar) Create(out io.Writer) error {
if t.tw != nil {
return fmt.Errorf("tar archive is already created for writing")
}
// wrapping writers allows us to output
// compressed tarballs, for example
if t.writerWrapFn != nil {
var err error
out, err = t.writerWrapFn(out)
if err != nil {
return fmt.Errorf("wrapping writer: %v", err)
}
}
t.tw = tar.NewWriter(out)
return nil
}
// Write writes f to t, which must have been opened for writing first.
func (t *Tar) Write(f File) error {
if t.tw == nil {
return fmt.Errorf("tar archive was not created for writing first")
}
if f.FileInfo == nil {
return fmt.Errorf("no file info")
}
if f.FileInfo.Name() == "" {
return fmt.Errorf("missing file name")
}
var linkTarget string
if isSymlink(f) {
fi, ok := f.FileInfo.(FileInfo)
if !ok {
return fmt.Errorf("failed to cast fs.FileInfo to archiver.FileInfo: %v", f)
}
var err error
linkTarget, err = os.Readlink(fi.SourcePath)
if err != nil {
return fmt.Errorf("%s: readlink: %v", fi.SourcePath, err)
}
}
hdr, err := tar.FileInfoHeader(f, filepath.ToSlash(linkTarget))
if err != nil {
return fmt.Errorf("%s: making header: %v", f.Name(), err)
}
err = t.tw.WriteHeader(hdr)
if err != nil {
return fmt.Errorf("%s: writing header: %w", hdr.Name, err)
}
if f.IsDir() {
return nil // directories have no contents
}
if hdr.Typeflag == tar.TypeReg {
if f.ReadCloser == nil {
return fmt.Errorf("%s: no way to read file contents", f.Name())
}
_, err := io.Copy(t.tw, f)
if err != nil {
return fmt.Errorf("%s: copying contents: %w", f.Name(), err)
}
}
return nil
}
// Open opens t for reading an archive from
// in. The size parameter is not used.
func (t *Tar) Open(in io.Reader, size int64) error {
if t.tr != nil {
return fmt.Errorf("tar archive is already open for reading")
}
// wrapping readers allows us to open compressed tarballs
if t.readerWrapFn != nil {
var err error
in, err = t.readerWrapFn(in)
if err != nil {
return fmt.Errorf("wrapping file reader: %v", err)
}
}
t.tr = tar.NewReader(in)
return nil
}
// Read reads the next file from t, which must have
// already been opened for reading. If there are no
// more files, the error is io.EOF. The File must
// be closed when finished reading from it.
func (t *Tar) Read() (File, error) {
if t.tr == nil {
return File{}, fmt.Errorf("tar archive is not open")
}
hdr, err := t.tr.Next()
if err != nil {
return File{}, err // don't wrap error; preserve io.EOF
}
file := File{
FileInfo: hdr.FileInfo(),
Header: hdr,
ReadCloser: ReadFakeCloser{t.tr},
}
return file, nil
}
// Close closes the tar archive(s) opened by Create and Open.
func (t *Tar) Close() error {
var err error
if t.tr != nil {
t.tr = nil
}
if t.tw != nil {
tw := t.tw
t.tw = nil
err = tw.Close()
}
// make sure cleanup of "Reader/Writer wrapper"
// (say that ten times fast) happens AFTER the
// underlying stream is closed
if t.cleanupWrapFn != nil {
t.cleanupWrapFn()
}
return err
}
// Walk calls walkFn for each visited item in archive.
func (t *Tar) Walk(archive string, walkFn WalkFunc) error {
file, err := os.Open(archive)
if err != nil {
return fmt.Errorf("opening archive file: %v", err)
}
defer file.Close()
err = t.Open(file, 0)
if err != nil {
return fmt.Errorf("opening archive: %v", err)
}
defer t.Close()
for {
f, err := t.Read()
if err == io.EOF {
break
}
if err != nil {
if t.ContinueOnError {
log.Printf("[ERROR] Opening next file: %v", err)
continue
}
return fmt.Errorf("opening next file: %v", err)
}
err = walkFn(f)
if err != nil {
if err == ErrStopWalk {
break
}
if t.ContinueOnError {
log.Printf("[ERROR] Walking %s: %v", f.Name(), err)
continue
}
return fmt.Errorf("walking %s: %v", f.Name(), err)
}
}
return nil
}
// Extract extracts a single file from the tar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (t *Tar) Extract(source, target, destination string) error {
// target refers to a path inside the archive, which should be clean also
target = path.Clean(target)
// if the target ends up being a directory, then
// we will continue walking and extracting files
// until we are no longer within that directory
var targetDirPath string
return t.Walk(source, func(f File) error {
th, ok := f.Header.(*tar.Header)
if !ok {
return fmt.Errorf("expected header to be *tar.Header but was %T", f.Header)
}
// importantly, cleaning the path strips tailing slash,
// which must be appended to folders within the archive
name := path.Clean(th.Name)
if f.IsDir() && target == name {
targetDirPath = path.Dir(name)
}
if within(target, th.Name) {
// either this is the exact file we want, or is
// in the directory we want to extract
// build the filename we will extract to
end, err := filepath.Rel(targetDirPath, th.Name)
if err != nil {
return fmt.Errorf("relativizing paths: %v", err)
}
th.Name = end
// relativize any hardlink names
if th.Typeflag == tar.TypeLink {
th.Linkname = filepath.Join(filepath.Base(filepath.Dir(th.Linkname)), filepath.Base(th.Linkname))
}
err = t.untarFile(f, destination, th)
if err != nil {
return fmt.Errorf("extracting file %s: %v", th.Name, err)
}
// if our target was not a directory, stop walk
if targetDirPath == "" {
return ErrStopWalk
}
} else if targetDirPath != "" {
// finished walking the entire directory
return ErrStopWalk
}
return nil
})
}
// Match returns true if the format of file matches this
// type's format. It should not affect reader position.
func (*Tar) Match(file io.ReadSeeker) (bool, error) {
currentPos, err := file.Seek(0, io.SeekCurrent)
if err != nil {
return false, err
}
_, err = file.Seek(0, 0)
if err != nil {
return false, err
}
defer func() {
_, _ = file.Seek(currentPos, io.SeekStart)
}()
buf := make([]byte, tarBlockSize)
if _, err = io.ReadFull(file, buf); err != nil {
return false, nil
}
return hasTarHeader(buf), nil
}
// hasTarHeader checks passed bytes has a valid tar header or not. buf must
// contain at least 512 bytes and if not, it always returns false.
func hasTarHeader(buf []byte) bool {
if len(buf) < tarBlockSize {
return false
}
b := buf[148:156]
b = bytes.Trim(b, " \x00") // clean up all spaces and null bytes
if len(b) == 0 {
return false // unknown format
}
hdrSum, err := strconv.ParseUint(string(b), 8, 64)
if err != nil {
return false
}
// According to the go official archive/tar, Sun tar uses signed byte
// values so this calcs both signed and unsigned
var usum uint64
var sum int64
for i, c := range buf {
if 148 <= i && i < 156 {
c = ' ' // checksum field itself is counted as branks
}
usum += uint64(uint8(c))
sum += int64(int8(c))
}
if hdrSum != usum && int64(hdrSum) != sum {
return false // invalid checksum
}
return true
}
func (t *Tar) String() string { return "tar" }
// NewTar returns a new, default instance ready to be customized and used.
func NewTar() *Tar {
return &Tar{
MkdirAll: true,
}
}
const tarBlockSize = 512
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(Tar))
_ = Writer(new(Tar))
_ = Archiver(new(Tar))
_ = Unarchiver(new(Tar))
_ = Walker(new(Tar))
_ = Extractor(new(Tar))
_ = Matcher(new(Tar))
_ = ExtensionChecker(new(Tar))
_ = FilenameChecker(new(Tar))
)
// DefaultTar is a default instance that is conveniently ready to use.
var DefaultTar = NewTar()

67
tar_test.go Normal file
View file

@ -0,0 +1,67 @@
package archiver_test
import (
"io/ioutil"
"os"
"path"
"testing"
"github.com/mholt/archiver/v3"
)
func requireRegularFile(t *testing.T, path string) os.FileInfo {
fileInfo, err := os.Stat(path)
if err != nil {
t.Fatalf("fileInfo on '%s': %v", path, err)
}
if !fileInfo.Mode().IsRegular() {
t.Fatalf("'%s' expected to be a regular file", path)
}
return fileInfo
}
func assertSameFile(t *testing.T, f1, f2 os.FileInfo) {
if !os.SameFile(f1, f2) {
t.Errorf("expected '%s' and '%s' to be the same file", f1.Name(), f2.Name())
}
}
func TestDefaultTar_Unarchive_HardlinkSuccess(t *testing.T) {
source := "testdata/gnu-hardlinks.tar"
destination, err := ioutil.TempDir("", "archiver_tar_test")
if err != nil {
t.Fatalf("creating temp dir: %v", err)
}
defer os.RemoveAll(destination)
err = archiver.DefaultTar.Unarchive(source, destination)
if err != nil {
t.Fatalf("unarchiving '%s' to '%s': %v", source, destination, err)
}
fileaInfo := requireRegularFile(t, path.Join(destination, "dir-1", "dir-2", "file-a"))
filebInfo := requireRegularFile(t, path.Join(destination, "dir-1", "dir-2", "file-b"))
assertSameFile(t, fileaInfo, filebInfo)
}
func TestDefaultTar_Extract_HardlinkSuccess(t *testing.T) {
source := "testdata/gnu-hardlinks.tar"
destination, err := ioutil.TempDir("", "archiver_tar_test")
if err != nil {
t.Fatalf("creating temp dir: %v", err)
}
defer os.RemoveAll(destination)
err = archiver.DefaultTar.Extract(source, path.Join("dir-1", "dir-2"), destination)
if err != nil {
t.Fatalf("unarchiving '%s' to '%s': %v", source, destination, err)
}
fileaInfo := requireRegularFile(t, path.Join(destination, "dir-2", "file-a"))
filebInfo := requireRegularFile(t, path.Join(destination, "dir-2", "file-b"))
assertSameFile(t, fileaInfo, filebInfo)
}

114
tarbrotli.go Normal file
View file

@ -0,0 +1,114 @@
package archiver
import (
"fmt"
"io"
"strings"
"github.com/andybalholm/brotli"
)
// TarBrotli facilitates brotli compression of tarball archives.
type TarBrotli struct {
*Tar
Quality int
}
// CheckExt ensures the file extension matches the format.
func (*TarBrotli) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".tar.br") &&
!strings.HasSuffix(filename, ".tbr") {
return fmt.Errorf("filename must have a .tar.br or .tbr extension")
}
return nil
}
// Archive creates a compressed tar file at destination
// containing the files listed in sources. The destination
// must end with ".tar.br" or ".tbr". File paths can be
// those of regular files or directories; directories will
// be recursively added.
func (tbr *TarBrotli) Archive(sources []string, destination string) error {
err := tbr.CheckExt(destination)
if err != nil {
return fmt.Errorf("output %s", err.Error())
}
tbr.wrapWriter()
return tbr.Tar.Archive(sources, destination)
}
// Unarchive unpacks the compressed tarball at
// source to destination. Destination will be
// treated as a folder name.
func (tbr *TarBrotli) Unarchive(source, destination string) error {
tbr.wrapReader()
return tbr.Tar.Unarchive(source, destination)
}
// Walk calls walkFn for each visited item in archive.
func (tbr *TarBrotli) Walk(archive string, walkFn WalkFunc) error {
tbr.wrapReader()
return tbr.Tar.Walk(archive, walkFn)
}
// Create opens txz for writing a compressed
// tar archive to out.
func (tbr *TarBrotli) Create(out io.Writer) error {
tbr.wrapWriter()
return tbr.Tar.Create(out)
}
// Open opens t for reading a compressed archive from
// in. The size parameter is not used.
func (tbr *TarBrotli) Open(in io.Reader, size int64) error {
tbr.wrapReader()
return tbr.Tar.Open(in, size)
}
// Extract extracts a single file from the tar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (tbr *TarBrotli) Extract(source, target, destination string) error {
tbr.wrapReader()
return tbr.Tar.Extract(source, target, destination)
}
func (tbr *TarBrotli) wrapWriter() {
var brw *brotli.Writer
tbr.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) {
brw = brotli.NewWriterLevel(w, tbr.Quality)
return brw, nil
}
tbr.Tar.cleanupWrapFn = func() {
brw.Close()
}
}
func (tbr *TarBrotli) wrapReader() {
tbr.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) {
return brotli.NewReader(r), nil
}
}
func (tbr *TarBrotli) String() string { return "tar.br" }
// NewTarBrotli returns a new, default instance ready to be customized and used.
func NewTarBrotli() *TarBrotli {
return &TarBrotli{
Tar: NewTar(),
Quality: brotli.DefaultCompression,
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(TarBrotli))
_ = Writer(new(TarBrotli))
_ = Archiver(new(TarBrotli))
_ = Unarchiver(new(TarBrotli))
_ = Walker(new(TarBrotli))
_ = Extractor(new(TarBrotli))
)
// DefaultTarBrotli is a convenient archiver ready to use.
var DefaultTarBrotli = NewTarBrotli()

126
tarbz2.go Normal file
View file

@ -0,0 +1,126 @@
package archiver
import (
"fmt"
"io"
"strings"
"github.com/dsnet/compress/bzip2"
)
// TarBz2 facilitates bzip2 compression
// (https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf)
// of tarball archives.
type TarBz2 struct {
*Tar
CompressionLevel int
}
// CheckExt ensures the file extension matches the format.
func (*TarBz2) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".tar.bz2") &&
!strings.HasSuffix(filename, ".tbz2") {
return fmt.Errorf("filename must have a .tar.bz2 or .tbz2 extension")
}
return nil
}
// Archive creates a compressed tar file at destination
// containing the files listed in sources. The destination
// must end with ".tar.bz2" or ".tbz2". File paths can be
// those of regular files or directories; directories will
// be recursively added.
func (tbz2 *TarBz2) Archive(sources []string, destination string) error {
err := tbz2.CheckExt(destination)
if err != nil {
return fmt.Errorf("output %s", err.Error())
}
tbz2.wrapWriter()
return tbz2.Tar.Archive(sources, destination)
}
// Unarchive unpacks the compressed tarball at
// source to destination. Destination will be
// treated as a folder name.
func (tbz2 *TarBz2) Unarchive(source, destination string) error {
tbz2.wrapReader()
return tbz2.Tar.Unarchive(source, destination)
}
// Walk calls walkFn for each visited item in archive.
func (tbz2 *TarBz2) Walk(archive string, walkFn WalkFunc) error {
tbz2.wrapReader()
return tbz2.Tar.Walk(archive, walkFn)
}
// Create opens tbz2 for writing a compressed
// tar archive to out.
func (tbz2 *TarBz2) Create(out io.Writer) error {
tbz2.wrapWriter()
return tbz2.Tar.Create(out)
}
// Open opens t for reading a compressed archive from
// in. The size parameter is not used.
func (tbz2 *TarBz2) Open(in io.Reader, size int64) error {
tbz2.wrapReader()
return tbz2.Tar.Open(in, size)
}
// Extract extracts a single file from the tar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (tbz2 *TarBz2) Extract(source, target, destination string) error {
tbz2.wrapReader()
return tbz2.Tar.Extract(source, target, destination)
}
func (tbz2 *TarBz2) wrapWriter() {
var bz2w *bzip2.Writer
tbz2.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) {
var err error
bz2w, err = bzip2.NewWriter(w, &bzip2.WriterConfig{
Level: tbz2.CompressionLevel,
})
return bz2w, err
}
tbz2.Tar.cleanupWrapFn = func() {
bz2w.Close()
}
}
func (tbz2 *TarBz2) wrapReader() {
var bz2r *bzip2.Reader
tbz2.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) {
var err error
bz2r, err = bzip2.NewReader(r, nil)
return bz2r, err
}
tbz2.Tar.cleanupWrapFn = func() {
bz2r.Close()
}
}
func (tbz2 *TarBz2) String() string { return "tar.bz2" }
// NewTarBz2 returns a new, default instance ready to be customized and used.
func NewTarBz2() *TarBz2 {
return &TarBz2{
CompressionLevel: bzip2.DefaultCompression,
Tar: NewTar(),
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(TarBz2))
_ = Writer(new(TarBz2))
_ = Archiver(new(TarBz2))
_ = Unarchiver(new(TarBz2))
_ = Walker(new(TarBz2))
_ = Extractor(new(TarBz2))
)
// DefaultTarBz2 is a convenient archiver ready to use.
var DefaultTarBz2 = NewTarBz2()

137
targz.go Normal file
View file

@ -0,0 +1,137 @@
package archiver
import (
"fmt"
"io"
"strings"
"github.com/klauspost/compress/gzip"
"github.com/klauspost/pgzip"
)
// TarGz facilitates gzip compression
// (RFC 1952) of tarball archives.
type TarGz struct {
*Tar
// The compression level to use, as described
// in the compress/gzip package.
CompressionLevel int
// Disables parallel gzip.
SingleThreaded bool
}
// CheckExt ensures the file extension matches the format.
func (*TarGz) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".tar.gz") &&
!strings.HasSuffix(filename, ".tgz") {
return fmt.Errorf("filename must have a .tar.gz or .tgz extension")
}
return nil
}
// Archive creates a compressed tar file at destination
// containing the files listed in sources. The destination
// must end with ".tar.gz" or ".tgz". File paths can be
// those of regular files or directories; directories will
// be recursively added.
func (tgz *TarGz) Archive(sources []string, destination string) error {
err := tgz.CheckExt(destination)
if err != nil {
return fmt.Errorf("output %s", err.Error())
}
tgz.wrapWriter()
return tgz.Tar.Archive(sources, destination)
}
// Unarchive unpacks the compressed tarball at
// source to destination. Destination will be
// treated as a folder name.
func (tgz *TarGz) Unarchive(source, destination string) error {
tgz.wrapReader()
return tgz.Tar.Unarchive(source, destination)
}
// Walk calls walkFn for each visited item in archive.
func (tgz *TarGz) Walk(archive string, walkFn WalkFunc) error {
tgz.wrapReader()
return tgz.Tar.Walk(archive, walkFn)
}
// Create opens txz for writing a compressed
// tar archive to out.
func (tgz *TarGz) Create(out io.Writer) error {
tgz.wrapWriter()
return tgz.Tar.Create(out)
}
// Open opens t for reading a compressed archive from
// in. The size parameter is not used.
func (tgz *TarGz) Open(in io.Reader, size int64) error {
tgz.wrapReader()
return tgz.Tar.Open(in, size)
}
// Extract extracts a single file from the tar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (tgz *TarGz) Extract(source, target, destination string) error {
tgz.wrapReader()
return tgz.Tar.Extract(source, target, destination)
}
func (tgz *TarGz) wrapWriter() {
var gzw io.WriteCloser
tgz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) {
var err error
if tgz.SingleThreaded {
gzw, err = gzip.NewWriterLevel(w, tgz.CompressionLevel)
} else {
gzw, err = pgzip.NewWriterLevel(w, tgz.CompressionLevel)
}
return gzw, err
}
tgz.Tar.cleanupWrapFn = func() {
gzw.Close()
}
}
func (tgz *TarGz) wrapReader() {
var gzr io.ReadCloser
tgz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) {
var err error
if tgz.SingleThreaded {
gzr, err = gzip.NewReader(r)
} else {
gzr, err = pgzip.NewReader(r)
}
return gzr, err
}
tgz.Tar.cleanupWrapFn = func() {
gzr.Close()
}
}
func (tgz *TarGz) String() string { return "tar.gz" }
// NewTarGz returns a new, default instance ready to be customized and used.
func NewTarGz() *TarGz {
return &TarGz{
CompressionLevel: gzip.DefaultCompression,
Tar: NewTar(),
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(TarGz))
_ = Writer(new(TarGz))
_ = Archiver(new(TarGz))
_ = Unarchiver(new(TarGz))
_ = Walker(new(TarGz))
_ = Extractor(new(TarGz))
)
// DefaultTarGz is a convenient archiver ready to use.
var DefaultTarGz = NewTarGz()

129
tarlz4.go Normal file
View file

@ -0,0 +1,129 @@
package archiver
import (
"fmt"
"io"
"strings"
"github.com/pierrec/lz4/v4"
)
// TarLz4 facilitates lz4 compression
// (https://github.com/lz4/lz4/tree/master/doc)
// of tarball archives.
type TarLz4 struct {
*Tar
// The compression level to use when writing.
// Minimum 0 (fast compression), maximum 12
// (most space savings).
CompressionLevel int
}
// CheckExt ensures the file extension matches the format.
func (*TarLz4) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".tar.lz4") &&
!strings.HasSuffix(filename, ".tlz4") {
return fmt.Errorf("filename must have a .tar.lz4 or .tlz4 extension")
}
return nil
}
// Archive creates a compressed tar file at destination
// containing the files listed in sources. The destination
// must end with ".tar.lz4" or ".tlz4". File paths can be
// those of regular files or directories; directories will
// be recursively added.
func (tlz4 *TarLz4) Archive(sources []string, destination string) error {
err := tlz4.CheckExt(destination)
if err != nil {
return fmt.Errorf("output %s", err.Error())
}
tlz4.wrapWriter()
return tlz4.Tar.Archive(sources, destination)
}
// Unarchive unpacks the compressed tarball at
// source to destination. Destination will be
// treated as a folder name.
func (tlz4 *TarLz4) Unarchive(source, destination string) error {
tlz4.wrapReader()
return tlz4.Tar.Unarchive(source, destination)
}
// Walk calls walkFn for each visited item in archive.
func (tlz4 *TarLz4) Walk(archive string, walkFn WalkFunc) error {
tlz4.wrapReader()
return tlz4.Tar.Walk(archive, walkFn)
}
// Create opens tlz4 for writing a compressed
// tar archive to out.
func (tlz4 *TarLz4) Create(out io.Writer) error {
tlz4.wrapWriter()
return tlz4.Tar.Create(out)
}
// Open opens t for reading a compressed archive from
// in. The size parameter is not used.
func (tlz4 *TarLz4) Open(in io.Reader, size int64) error {
tlz4.wrapReader()
return tlz4.Tar.Open(in, size)
}
// Extract extracts a single file from the tar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (tlz4 *TarLz4) Extract(source, target, destination string) error {
tlz4.wrapReader()
return tlz4.Tar.Extract(source, target, destination)
}
func (tlz4 *TarLz4) wrapWriter() {
var lz4w *lz4.Writer
tlz4.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) {
lz4w = lz4.NewWriter(w)
// TODO archiver v4: use proper lz4.Fast
// bitshifting for backwards compatibility with lz4/v3
options := []lz4.Option{
lz4.CompressionLevelOption(lz4.CompressionLevel(1 << (8 + tlz4.CompressionLevel))),
}
if err := lz4w.Apply(options...); err != nil {
return lz4w, err
}
return lz4w, nil
}
tlz4.Tar.cleanupWrapFn = func() {
lz4w.Close()
}
}
func (tlz4 *TarLz4) wrapReader() {
tlz4.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) {
return lz4.NewReader(r), nil
}
}
func (tlz4 *TarLz4) String() string { return "tar.lz4" }
// NewTarLz4 returns a new, default instance ready to be customized and used.
func NewTarLz4() *TarLz4 {
return &TarLz4{
CompressionLevel: 9, // https://github.com/lz4/lz4/blob/1b819bfd633ae285df2dfe1b0589e1ec064f2873/lib/lz4hc.h#L48
Tar: NewTar(),
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(TarLz4))
_ = Writer(new(TarLz4))
_ = Archiver(new(TarLz4))
_ = Unarchiver(new(TarLz4))
_ = Walker(new(TarLz4))
_ = Extractor(new(TarLz4))
)
// DefaultTarLz4 is a convenient archiver ready to use.
var DefaultTarLz4 = NewTarLz4()

114
tarsz.go Normal file
View file

@ -0,0 +1,114 @@
package archiver
import (
"fmt"
"io"
"strings"
"github.com/golang/snappy"
)
// TarSz facilitates Snappy compression
// (https://github.com/google/snappy)
// of tarball archives.
type TarSz struct {
*Tar
}
// CheckExt ensures the file extension matches the format.
func (*TarSz) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".tar.sz") &&
!strings.HasSuffix(filename, ".tsz") {
return fmt.Errorf("filename must have a .tar.sz or .tsz extension")
}
return nil
}
// Archive creates a compressed tar file at destination
// containing the files listed in sources. The destination
// must end with ".tar.sz" or ".tsz". File paths can be
// those of regular files or directories; directories will
// be recursively added.
func (tsz *TarSz) Archive(sources []string, destination string) error {
err := tsz.CheckExt(destination)
if err != nil {
return fmt.Errorf("output %s", err.Error())
}
tsz.wrapWriter()
return tsz.Tar.Archive(sources, destination)
}
// Unarchive unpacks the compressed tarball at
// source to destination. Destination will be
// treated as a folder name.
func (tsz *TarSz) Unarchive(source, destination string) error {
tsz.wrapReader()
return tsz.Tar.Unarchive(source, destination)
}
// Walk calls walkFn for each visited item in archive.
func (tsz *TarSz) Walk(archive string, walkFn WalkFunc) error {
tsz.wrapReader()
return tsz.Tar.Walk(archive, walkFn)
}
// Create opens tsz for writing a compressed
// tar archive to out.
func (tsz *TarSz) Create(out io.Writer) error {
tsz.wrapWriter()
return tsz.Tar.Create(out)
}
// Open opens t for reading a compressed archive from
// in. The size parameter is not used.
func (tsz *TarSz) Open(in io.Reader, size int64) error {
tsz.wrapReader()
return tsz.Tar.Open(in, size)
}
// Extract extracts a single file from the tar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (tsz *TarSz) Extract(source, target, destination string) error {
tsz.wrapReader()
return tsz.Tar.Extract(source, target, destination)
}
func (tsz *TarSz) wrapWriter() {
var sw *snappy.Writer
tsz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) {
sw = snappy.NewBufferedWriter(w)
return sw, nil
}
tsz.Tar.cleanupWrapFn = func() {
sw.Close()
}
}
func (tsz *TarSz) wrapReader() {
tsz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) {
return snappy.NewReader(r), nil
}
}
func (tsz *TarSz) String() string { return "tar.sz" }
// NewTarSz returns a new, default instance ready to be customized and used.
func NewTarSz() *TarSz {
return &TarSz{
Tar: NewTar(),
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(TarSz))
_ = Writer(new(TarSz))
_ = Archiver(new(TarSz))
_ = Unarchiver(new(TarSz))
_ = Walker(new(TarSz))
_ = Extractor(new(TarSz))
)
// DefaultTarSz is a convenient archiver ready to use.
var DefaultTarSz = NewTarSz()

119
tarxz.go Normal file
View file

@ -0,0 +1,119 @@
package archiver
import (
"fmt"
"io"
"strings"
"github.com/ulikunitz/xz"
fastxz "github.com/xi2/xz"
)
// TarXz facilitates xz compression
// (https://tukaani.org/xz/format.html)
// of tarball archives.
type TarXz struct {
*Tar
}
// CheckExt ensures the file extension matches the format.
func (*TarXz) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".tar.xz") &&
!strings.HasSuffix(filename, ".txz") {
return fmt.Errorf("filename must have a .tar.xz or .txz extension")
}
return nil
}
// Archive creates a compressed tar file at destination
// containing the files listed in sources. The destination
// must end with ".tar.xz" or ".txz". File paths can be
// those of regular files or directories; directories will
// be recursively added.
func (txz *TarXz) Archive(sources []string, destination string) error {
err := txz.CheckExt(destination)
if err != nil {
return fmt.Errorf("output %s", err.Error())
}
txz.wrapWriter()
return txz.Tar.Archive(sources, destination)
}
// Unarchive unpacks the compressed tarball at
// source to destination. Destination will be
// treated as a folder name.
func (txz *TarXz) Unarchive(source, destination string) error {
txz.wrapReader()
return txz.Tar.Unarchive(source, destination)
}
// Walk calls walkFn for each visited item in archive.
func (txz *TarXz) Walk(archive string, walkFn WalkFunc) error {
txz.wrapReader()
return txz.Tar.Walk(archive, walkFn)
}
// Create opens txz for writing a compressed
// tar archive to out.
func (txz *TarXz) Create(out io.Writer) error {
txz.wrapWriter()
return txz.Tar.Create(out)
}
// Open opens t for reading a compressed archive from
// in. The size parameter is not used.
func (txz *TarXz) Open(in io.Reader, size int64) error {
txz.wrapReader()
return txz.Tar.Open(in, size)
}
// Extract extracts a single file from the tar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (txz *TarXz) Extract(source, target, destination string) error {
txz.wrapReader()
return txz.Tar.Extract(source, target, destination)
}
func (txz *TarXz) wrapWriter() {
var xzw *xz.Writer
txz.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) {
var err error
xzw, err = xz.NewWriter(w)
return xzw, err
}
txz.Tar.cleanupWrapFn = func() {
xzw.Close()
}
}
func (txz *TarXz) wrapReader() {
var xzr *fastxz.Reader
txz.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) {
var err error
xzr, err = fastxz.NewReader(r, 0)
return xzr, err
}
}
func (txz *TarXz) String() string { return "tar.xz" }
// NewTarXz returns a new, default instance ready to be customized and used.
func NewTarXz() *TarXz {
return &TarXz{
Tar: NewTar(),
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(TarXz))
_ = Writer(new(TarXz))
_ = Archiver(new(TarXz))
_ = Unarchiver(new(TarXz))
_ = Walker(new(TarXz))
_ = Extractor(new(TarXz))
)
// DefaultTarXz is a convenient archiver ready to use.
var DefaultTarXz = NewTarXz()

120
tarzst.go Normal file
View file

@ -0,0 +1,120 @@
package archiver
import (
"fmt"
"io"
"strings"
"github.com/klauspost/compress/zstd"
)
// TarZstd facilitates Zstandard compression
// (RFC 8478) of tarball archives.
type TarZstd struct {
*Tar
}
// CheckExt ensures the file extension matches the format.
func (*TarZstd) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".tar.zst") {
return fmt.Errorf("filename must have a .tar.zst extension")
}
return nil
}
// Archive creates a compressed tar file at destination
// containing the files listed in sources. The destination
// must end with ".tar.zst" or ".tzst". File paths can be
// those of regular files or directories; directories will
// be recursively added.
func (tzst *TarZstd) Archive(sources []string, destination string) error {
err := tzst.CheckExt(destination)
if err != nil {
return fmt.Errorf("output %s", err.Error())
}
tzst.wrapWriter()
return tzst.Tar.Archive(sources, destination)
}
// Unarchive unpacks the compressed tarball at
// source to destination. Destination will be
// treated as a folder name.
func (tzst *TarZstd) Unarchive(source, destination string) error {
tzst.wrapReader()
return tzst.Tar.Unarchive(source, destination)
}
// Walk calls walkFn for each visited item in archive.
func (tzst *TarZstd) Walk(archive string, walkFn WalkFunc) error {
tzst.wrapReader()
return tzst.Tar.Walk(archive, walkFn)
}
// Create opens txz for writing a compressed
// tar archive to out.
func (tzst *TarZstd) Create(out io.Writer) error {
tzst.wrapWriter()
return tzst.Tar.Create(out)
}
// Open opens t for reading a compressed archive from
// in. The size parameter is not used.
func (tzst *TarZstd) Open(in io.Reader, size int64) error {
tzst.wrapReader()
return tzst.Tar.Open(in, size)
}
// Extract extracts a single file from the tar archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (tzst *TarZstd) Extract(source, target, destination string) error {
tzst.wrapReader()
return tzst.Tar.Extract(source, target, destination)
}
func (tzst *TarZstd) wrapWriter() {
var zstdw *zstd.Encoder
tzst.Tar.writerWrapFn = func(w io.Writer) (io.Writer, error) {
var err error
zstdw, err = zstd.NewWriter(w)
return zstdw, err
}
tzst.Tar.cleanupWrapFn = func() {
zstdw.Close()
}
}
func (tzst *TarZstd) wrapReader() {
var zstdr *zstd.Decoder
tzst.Tar.readerWrapFn = func(r io.Reader) (io.Reader, error) {
var err error
zstdr, err = zstd.NewReader(r)
return zstdr, err
}
tzst.Tar.cleanupWrapFn = func() {
zstdr.Close()
}
}
func (tzst *TarZstd) String() string { return "tar.zst" }
// NewTarZstd returns a new, default instance ready to be customized and used.
func NewTarZstd() *TarZstd {
return &TarZstd{
Tar: NewTar(),
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(TarZstd))
_ = Writer(new(TarZstd))
_ = Archiver(new(TarZstd))
_ = Unarchiver(new(TarZstd))
_ = Walker(new(TarZstd))
_ = ExtensionChecker(new(TarZstd))
_ = Extractor(new(TarZstd))
)
// DefaultTarZstd is a convenient archiver ready to use.
var DefaultTarZstd = NewTarZstd()

BIN
testdata/corpus/already-compressed.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

View file

@ -0,0 +1,2 @@
"interface{} says nothing."
- Rob Pike

2
testdata/corpus/proverbs/proverb1.txt vendored Normal file
View file

@ -0,0 +1,2 @@
"Channels orchestrate; mutexes serialize."
- Rob Pike

2
testdata/corpus/proverbs/proverb2.txt vendored Normal file
View file

@ -0,0 +1,2 @@
"A little copying is better than a little dependency."
- Rob Pike

2
testdata/corpus/quote1.txt vendored Normal file
View file

@ -0,0 +1,2 @@
"Go has generics; they're called interfaces."
- Matt Holt

74
testdata/create-evil-tar.go vendored Normal file
View file

@ -0,0 +1,74 @@
package main
import (
"archive/tar"
"fmt"
"log"
"os"
"time"
)
func main() {
// Create a file to write our archive to.
tarname := "double-evil.tar"
fw, err := os.Create(tarname)
if nil != err {
log.Fatal(err)
return
}
// Create a new tar archive.
tw := tar.NewWriter(fw)
// Write the evil symlink, it points outside of the target directory
hdr := &tar.Header{
Name: "bad/file.txt",
Mode: 0644,
Typeflag: tar.TypeSymlink,
Linkname: "../../badfile.txt",
ModTime: time.Now(),
}
if err := tw.WriteHeader(hdr); err != nil {
log.Fatal(err)
return
}
// Write safe files to the archive.
var files = []struct {
Name, Body string
}{
{"goodfile.txt", "hello world"},
{"morefile.txt", "hello world"},
{"bad/file.txt", "Mwa-ha-ha"},
}
for _, file := range files {
hdr := &tar.Header{
Name: file.Name,
Mode: 0644,
Size: int64(len(file.Body)),
ModTime: time.Now(),
}
if err := tw.WriteHeader(hdr); err != nil {
log.Fatal(err)
return
}
if _, err := tw.Write([]byte(file.Body)); err != nil {
log.Fatal(err)
}
}
// Close the in-memory archive so that it writes trailing data
err = tw.Close()
if err != nil {
log.Fatal(err)
}
fmt.Printf("Wrote %s\n", tarname)
// close the on-disk archive so that it flushes all bytes
if err = fw.Close(); err != nil {
log.Fatal(err)
return
}
}

75
testdata/create-evil-zip.go vendored Normal file
View file

@ -0,0 +1,75 @@
package main
import (
"log"
"os"
"time"
"github.com/klauspost/compress/zip"
)
func main() {
// Create a buffer to write our archive to.
fw, err := os.Create("double-evil.zip")
if nil != err {
log.Fatal(err)
return
}
// Create a new zip archive.
w := zip.NewWriter(fw)
// Write the evil symlink
h := &zip.FileHeader{
Name: "bad/file.txt",
Method: zip.Deflate,
Modified: time.Now(),
}
h.SetMode(os.ModeSymlink)
header, err := w.CreateHeader(h)
if err != nil {
log.Fatal(err)
}
// The evil symlink points outside of the target directory
_, err = header.Write([]byte("../../badfile.txt"))
if err != nil {
log.Fatal(err)
}
// Write safe files to the archive.
var files = []struct {
Name, Body string
}{
{"goodfile.txt", "hello world"},
{"morefile.txt", "hello world"},
{"bad/file.txt", "Mwa-ha-ha"},
}
for _, file := range files {
h := &zip.FileHeader{
Name: file.Name,
Method: zip.Deflate,
Modified: time.Now(),
}
header, err := w.CreateHeader(h)
if err != nil {
log.Fatal(err)
}
_, err = header.Write([]byte(file.Body))
if err != nil {
log.Fatal(err)
}
}
// close the in-memory archive so that it writes trailing data
if err = w.Close(); err != nil {
log.Fatal(err)
}
// close the on-disk archive so that it flushes all bytes
if err = fw.Close(); err != nil {
log.Fatal(err)
return
}
}

BIN
testdata/gnu-hardlinks.tar vendored Normal file

Binary file not shown.

BIN
testdata/sample.rar vendored Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

58
xz.go Normal file
View file

@ -0,0 +1,58 @@
package archiver
import (
"fmt"
"io"
"path/filepath"
"github.com/ulikunitz/xz"
fastxz "github.com/xi2/xz"
)
// Xz facilitates XZ compression.
type Xz struct{}
// Compress reads in, compresses it, and writes it to out.
func (x *Xz) Compress(in io.Reader, out io.Writer) error {
w, err := xz.NewWriter(out)
if err != nil {
return err
}
defer w.Close()
_, err = io.Copy(w, in)
return err
}
// Decompress reads in, decompresses it, and writes it to out.
func (x *Xz) Decompress(in io.Reader, out io.Writer) error {
r, err := fastxz.NewReader(in, 0)
if err != nil {
return err
}
_, err = io.Copy(out, r)
return err
}
// CheckExt ensures the file extension matches the format.
func (x *Xz) CheckExt(filename string) error {
if filepath.Ext(filename) != ".xz" {
return fmt.Errorf("filename must have a .xz extension")
}
return nil
}
func (x *Xz) String() string { return "xz" }
// NewXz returns a new, default instance ready to be customized and used.
func NewXz() *Xz {
return new(Xz)
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Compressor(new(Xz))
_ = Decompressor(new(Xz))
)
// DefaultXz is a default instance that is conveniently ready to use.
var DefaultXz = NewXz()

711
zip.go Normal file
View file

@ -0,0 +1,711 @@
package archiver
import (
"bytes"
"compress/flate"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path"
"path/filepath"
"strings"
"github.com/dsnet/compress/bzip2"
"github.com/klauspost/compress/zip"
"github.com/klauspost/compress/zstd"
"github.com/ulikunitz/xz"
)
// ZipCompressionMethod Compression type
type ZipCompressionMethod uint16
// Compression methods.
// see https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT.
// Note LZMA: Disabled - because 7z isn't able to unpack ZIP+LZMA ZIP+LZMA2 archives made this way - and vice versa.
const (
Store ZipCompressionMethod = 0
Deflate ZipCompressionMethod = 8
BZIP2 ZipCompressionMethod = 12
LZMA ZipCompressionMethod = 14
ZSTD ZipCompressionMethod = 93
XZ ZipCompressionMethod = 95
)
// Zip provides facilities for operating ZIP archives.
// See https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT.
type Zip struct {
// The compression level to use, as described
// in the compress/flate package.
CompressionLevel int
// Whether to overwrite existing files; if false,
// an error is returned if the file exists.
OverwriteExisting bool
// Whether to make all the directories necessary
// to create a zip archive in the desired path.
MkdirAll bool
// If enabled, selective compression will only
// compress files which are not already in a
// compressed format; this is decided based
// simply on file extension.
SelectiveCompression bool
// A single top-level folder can be implicitly
// created by the Archive or Unarchive methods
// if the files to be added to the archive
// or the files to be extracted from the archive
// do not all have a common root. This roughly
// mimics the behavior of archival tools integrated
// into OS file browsers which create a subfolder
// to avoid unexpectedly littering the destination
// folder with potentially many files, causing a
// problematic cleanup/organization situation.
// This feature is available for both creation
// and extraction of archives, but may be slightly
// inefficient with lots and lots of files,
// especially on extraction.
ImplicitTopLevelFolder bool
// Strip number of leading paths. This feature is available
// only during unpacking of the entire archive.
StripComponents int
// If true, errors encountered during reading
// or writing a single file will be logged and
// the operation will continue on remaining files.
ContinueOnError bool
// Compression algorithm
FileMethod ZipCompressionMethod
zw *zip.Writer
zr *zip.Reader
ridx int
//decinitialized bool
}
// CheckExt ensures the file extension matches the format.
func (*Zip) CheckExt(filename string) error {
if !strings.HasSuffix(filename, ".zip") {
return fmt.Errorf("filename must have a .zip extension")
}
return nil
}
// Registering a global decompressor is not reentrant and may panic
func registerDecompressor(zr *zip.Reader) {
// register zstd decompressor
zr.RegisterDecompressor(uint16(ZSTD), func(r io.Reader) io.ReadCloser {
zr, err := zstd.NewReader(r)
if err != nil {
return nil
}
return zr.IOReadCloser()
})
zr.RegisterDecompressor(uint16(BZIP2), func(r io.Reader) io.ReadCloser {
bz2r, err := bzip2.NewReader(r, nil)
if err != nil {
return nil
}
return bz2r
})
zr.RegisterDecompressor(uint16(XZ), func(r io.Reader) io.ReadCloser {
xr, err := xz.NewReader(r)
if err != nil {
return nil
}
return ioutil.NopCloser(xr)
})
}
// CheckPath ensures the file extension matches the format.
func (*Zip) CheckPath(to, filename string) error {
to, _ = filepath.Abs(to) //explicit the destination folder to prevent that 'string.HasPrefix' check can be 'bypassed' when no destination folder is supplied in input
dest := filepath.Join(to, filename)
//prevent path traversal attacks
if !strings.HasPrefix(dest, to) {
return &IllegalPathError{AbsolutePath: dest, Filename: filename}
}
return nil
}
// Archive creates a .zip file at destination containing
// the files listed in sources. The destination must end
// with ".zip". File paths can be those of regular files
// or directories. Regular files are stored at the 'root'
// of the archive, and directories are recursively added.
func (z *Zip) Archive(sources []string, destination string) error {
err := z.CheckExt(destination)
if err != nil {
return fmt.Errorf("checking extension: %v", err)
}
if !z.OverwriteExisting && fileExists(destination) {
return fmt.Errorf("file already exists: %s", destination)
}
// make the folder to contain the resulting archive
// if it does not already exist
destDir := filepath.Dir(destination)
if z.MkdirAll && !fileExists(destDir) {
err := mkdir(destDir, 0755)
if err != nil {
return fmt.Errorf("making folder for destination: %v", err)
}
}
out, err := os.Create(destination)
if err != nil {
return fmt.Errorf("creating %s: %v", destination, err)
}
defer out.Close()
err = z.Create(out)
if err != nil {
return fmt.Errorf("creating zip: %v", err)
}
defer z.Close()
var topLevelFolder string
if z.ImplicitTopLevelFolder && multipleTopLevels(sources) {
topLevelFolder = folderNameFromFileName(destination)
}
for _, source := range sources {
err := z.writeWalk(source, topLevelFolder, destination)
if err != nil {
return fmt.Errorf("walking %s: %v", source, err)
}
}
return nil
}
// Unarchive unpacks the .zip file at source to destination.
// Destination will be treated as a folder name.
func (z *Zip) Unarchive(source, destination string) error {
if !fileExists(destination) && z.MkdirAll {
err := mkdir(destination, 0755)
if err != nil {
return fmt.Errorf("preparing destination: %v", err)
}
}
file, err := os.Open(source)
if err != nil {
return fmt.Errorf("opening source file: %v", err)
}
defer file.Close()
fileInfo, err := file.Stat()
if err != nil {
return fmt.Errorf("statting source file: %v", err)
}
err = z.Open(file, fileInfo.Size())
if err != nil {
return fmt.Errorf("opening zip archive for reading: %v", err)
}
defer z.Close()
// if the files in the archive do not all share a common
// root, then make sure we extract to a single subfolder
// rather than potentially littering the destination...
if z.ImplicitTopLevelFolder {
files := make([]string, len(z.zr.File))
for i := range z.zr.File {
files[i] = z.zr.File[i].Name
}
if multipleTopLevels(files) {
destination = filepath.Join(destination, folderNameFromFileName(source))
}
}
for {
err := z.extractNext(destination)
if err == io.EOF {
break
}
if err != nil {
if z.ContinueOnError || IsIllegalPathError(err) {
log.Printf("[ERROR] Reading file in zip archive: %v", err)
continue
}
return fmt.Errorf("reading file in zip archive: %v", err)
}
}
return nil
}
func (z *Zip) extractNext(to string) error {
f, err := z.Read()
if err != nil {
return err // don't wrap error; calling loop must break on io.EOF
}
defer f.Close()
header, ok := f.Header.(zip.FileHeader)
if !ok {
return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header)
}
errPath := z.CheckPath(to, header.Name)
if errPath != nil {
return fmt.Errorf("checking path traversal attempt: %v", errPath)
}
if z.StripComponents > 0 {
if strings.Count(header.Name, "/") < z.StripComponents {
return nil // skip path with fewer components
}
for i := 0; i < z.StripComponents; i++ {
slash := strings.Index(header.Name, "/")
header.Name = header.Name[slash+1:]
}
}
return z.extractFile(f, to, &header)
}
func (z *Zip) extractFile(f File, to string, header *zip.FileHeader) error {
to = filepath.Join(to, header.Name)
// if a directory, no content; simply make the directory and return
if f.IsDir() {
return mkdir(to, f.Mode())
}
// do not overwrite existing files, if configured
if !z.OverwriteExisting && fileExists(to) {
return fmt.Errorf("file already exists: %s", to)
}
// extract symbolic links as symbolic links
if isSymlink(header.FileInfo()) {
// symlink target is the contents of the file
buf := new(bytes.Buffer)
_, err := io.Copy(buf, f)
if err != nil {
return fmt.Errorf("%s: reading symlink target: %v", header.Name, err)
}
return writeNewSymbolicLink(to, strings.TrimSpace(buf.String()))
}
return writeNewFile(to, f, f.Mode())
}
func (z *Zip) writeWalk(source, topLevelFolder, destination string) error {
sourceInfo, err := os.Stat(source)
if err != nil {
return fmt.Errorf("%s: stat: %v", source, err)
}
destAbs, err := filepath.Abs(destination)
if err != nil {
return fmt.Errorf("%s: getting absolute path of destination %s: %v", source, destination, err)
}
return filepath.Walk(source, func(fpath string, info os.FileInfo, err error) error {
handleErr := func(err error) error {
if z.ContinueOnError {
log.Printf("[ERROR] Walking %s: %v", fpath, err)
return nil
}
return err
}
if err != nil {
return handleErr(fmt.Errorf("traversing %s: %v", fpath, err))
}
if info == nil {
return handleErr(fmt.Errorf("%s: no file info", fpath))
}
// make sure we do not copy the output file into the output
// file; that results in an infinite loop and disk exhaustion!
fpathAbs, err := filepath.Abs(fpath)
if err != nil {
return handleErr(fmt.Errorf("%s: getting absolute path: %v", fpath, err))
}
if within(fpathAbs, destAbs) {
return nil
}
// build the name to be used within the archive
nameInArchive, err := makeNameInArchive(sourceInfo, source, topLevelFolder, fpath)
if err != nil {
return handleErr(err)
}
var file io.ReadCloser
if info.Mode().IsRegular() {
file, err = os.Open(fpath)
if err != nil {
return handleErr(fmt.Errorf("%s: opening: %v", fpath, err))
}
defer file.Close()
}
err = z.Write(File{
FileInfo: FileInfo{
FileInfo: info,
CustomName: nameInArchive,
SourcePath: fpath,
},
ReadCloser: file,
})
if err != nil {
return handleErr(fmt.Errorf("%s: writing: %s", fpath, err))
}
return nil
})
}
// Create opens z for writing a ZIP archive to out.
func (z *Zip) Create(out io.Writer) error {
if z.zw != nil {
return fmt.Errorf("zip archive is already created for writing")
}
z.zw = zip.NewWriter(out)
if z.CompressionLevel != flate.DefaultCompression {
z.zw.RegisterCompressor(zip.Deflate, func(out io.Writer) (io.WriteCloser, error) {
return flate.NewWriter(out, z.CompressionLevel)
})
}
switch z.FileMethod {
case BZIP2:
z.zw.RegisterCompressor(uint16(BZIP2), func(out io.Writer) (io.WriteCloser, error) {
return bzip2.NewWriter(out, &bzip2.WriterConfig{Level: z.CompressionLevel})
})
case ZSTD:
z.zw.RegisterCompressor(uint16(ZSTD), func(out io.Writer) (io.WriteCloser, error) {
return zstd.NewWriter(out)
})
case XZ:
z.zw.RegisterCompressor(uint16(XZ), func(out io.Writer) (io.WriteCloser, error) {
return xz.NewWriter(out)
})
}
return nil
}
// Write writes f to z, which must have been opened for writing first.
func (z *Zip) Write(f File) error {
if z.zw == nil {
return fmt.Errorf("zip archive was not created for writing first")
}
if f.FileInfo == nil {
return fmt.Errorf("no file info")
}
if f.FileInfo.Name() == "" {
return fmt.Errorf("missing file name")
}
header, err := zip.FileInfoHeader(f)
if err != nil {
return fmt.Errorf("%s: getting header: %v", f.Name(), err)
}
if f.IsDir() {
header.Name += "/" // required - strangely no mention of this in zip spec? but is in godoc...
header.Method = zip.Store
} else {
ext := strings.ToLower(path.Ext(header.Name))
if _, ok := compressedFormats[ext]; ok && z.SelectiveCompression {
header.Method = zip.Store
} else {
header.Method = uint16(z.FileMethod)
}
}
writer, err := z.zw.CreateHeader(header)
if err != nil {
return fmt.Errorf("%s: making header: %w", f.Name(), err)
}
return z.writeFile(f, writer)
}
func (z *Zip) writeFile(f File, writer io.Writer) error {
if f.IsDir() {
return nil // directories have no contents
}
if isSymlink(f) {
fi, ok := f.FileInfo.(FileInfo)
if !ok {
return fmt.Errorf("failed to cast fs.FileInfo to archiver.FileInfo: %v", f)
}
// file body for symlinks is the symlink target
linkTarget, err := os.Readlink(fi.SourcePath)
if err != nil {
return fmt.Errorf("%s: readlink: %v", fi.SourcePath, err)
}
_, err = writer.Write([]byte(filepath.ToSlash(linkTarget)))
if err != nil {
return fmt.Errorf("%s: writing symlink target: %v", fi.SourcePath, err)
}
return nil
}
if f.ReadCloser == nil {
return fmt.Errorf("%s: no way to read file contents", f.Name())
}
_, err := io.Copy(writer, f)
if err != nil {
return fmt.Errorf("%s: copying contents: %w", f.Name(), err)
}
return nil
}
// Open opens z for reading an archive from in,
// which is expected to have the given size and
// which must be an io.ReaderAt.
func (z *Zip) Open(in io.Reader, size int64) error {
inRdrAt, ok := in.(io.ReaderAt)
if !ok {
return fmt.Errorf("reader must be io.ReaderAt")
}
if z.zr != nil {
return fmt.Errorf("zip archive is already open for reading")
}
var err error
z.zr, err = zip.NewReader(inRdrAt, size)
if err != nil {
return fmt.Errorf("creating reader: %v", err)
}
registerDecompressor(z.zr)
z.ridx = 0
return nil
}
// Read reads the next file from z, which must have
// already been opened for reading. If there are no
// more files, the error is io.EOF. The File must
// be closed when finished reading from it.
func (z *Zip) Read() (File, error) {
if z.zr == nil {
return File{}, fmt.Errorf("zip archive is not open")
}
if z.ridx >= len(z.zr.File) {
return File{}, io.EOF
}
// access the file and increment counter so that
// if there is an error processing this file, the
// caller can still iterate to the next file
zf := z.zr.File[z.ridx]
z.ridx++
file := File{
FileInfo: zf.FileInfo(),
Header: zf.FileHeader,
}
rc, err := zf.Open()
if err != nil {
return file, fmt.Errorf("%s: open compressed file: %v", zf.Name, err)
}
file.ReadCloser = rc
return file, nil
}
// Close closes the zip archive(s) opened by Create and Open.
func (z *Zip) Close() error {
if z.zr != nil {
z.zr = nil
}
if z.zw != nil {
zw := z.zw
z.zw = nil
return zw.Close()
}
return nil
}
// Walk calls walkFn for each visited item in archive.
func (z *Zip) Walk(archive string, walkFn WalkFunc) error {
zr, err := zip.OpenReader(archive)
if err != nil {
return fmt.Errorf("opening zip reader: %v", err)
}
defer zr.Close()
registerDecompressor(&zr.Reader)
for _, zf := range zr.File {
zfrc, err := zf.Open()
if err != nil {
if zfrc != nil {
zfrc.Close()
}
if z.ContinueOnError {
log.Printf("[ERROR] Opening %s: %v", zf.Name, err)
continue
}
return fmt.Errorf("opening %s: %v", zf.Name, err)
}
err = walkFn(File{
FileInfo: zf.FileInfo(),
Header: zf.FileHeader,
ReadCloser: zfrc,
})
zfrc.Close()
if err != nil {
if err == ErrStopWalk {
break
}
if z.ContinueOnError {
log.Printf("[ERROR] Walking %s: %v", zf.Name, err)
continue
}
return fmt.Errorf("walking %s: %v", zf.Name, err)
}
}
return nil
}
// Extract extracts a single file from the zip archive.
// If the target is a directory, the entire folder will
// be extracted into destination.
func (z *Zip) Extract(source, target, destination string) error {
// target refers to a path inside the archive, which should be clean also
target = path.Clean(target)
// if the target ends up being a directory, then
// we will continue walking and extracting files
// until we are no longer within that directory
var targetDirPath string
return z.Walk(source, func(f File) error {
zfh, ok := f.Header.(zip.FileHeader)
if !ok {
return fmt.Errorf("expected header to be zip.FileHeader but was %T", f.Header)
}
// importantly, cleaning the path strips tailing slash,
// which must be appended to folders within the archive
name := path.Clean(zfh.Name)
if f.IsDir() && target == name {
targetDirPath = path.Dir(name)
}
if within(target, zfh.Name) {
// either this is the exact file we want, or is
// in the directory we want to extract
// build the filename we will extract to
end, err := filepath.Rel(targetDirPath, zfh.Name)
if err != nil {
return fmt.Errorf("relativizing paths: %v", err)
}
joined := filepath.Join(destination, end)
err = z.extractFile(f, joined, &zfh)
if err != nil {
return fmt.Errorf("extracting file %s: %v", zfh.Name, err)
}
// if our target was not a directory, stop walk
if targetDirPath == "" {
return ErrStopWalk
}
} else if targetDirPath != "" {
// finished walking the entire directory
return ErrStopWalk
}
return nil
})
}
// Match returns true if the format of file matches this
// type's format. It should not affect reader position.
func (*Zip) Match(file io.ReadSeeker) (bool, error) {
currentPos, err := file.Seek(0, io.SeekCurrent)
if err != nil {
return false, err
}
_, err = file.Seek(0, 0)
if err != nil {
return false, err
}
defer func() {
_, _ = file.Seek(currentPos, io.SeekStart)
}()
buf := make([]byte, 4)
if n, err := file.Read(buf); err != nil || n < 4 {
return false, nil
}
return bytes.Equal(buf, []byte("PK\x03\x04")), nil
}
func (z *Zip) String() string { return "zip" }
// NewZip returns a new, default instance ready to be customized and used.
func NewZip() *Zip {
return &Zip{
CompressionLevel: flate.DefaultCompression,
MkdirAll: true,
SelectiveCompression: true,
FileMethod: Deflate,
}
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Reader(new(Zip))
_ = Writer(new(Zip))
_ = Archiver(new(Zip))
_ = Unarchiver(new(Zip))
_ = Walker(new(Zip))
_ = Extractor(new(Zip))
_ = Matcher(new(Zip))
_ = ExtensionChecker(new(Zip))
_ = FilenameChecker(new(Zip))
)
// compressedFormats is a (non-exhaustive) set of lowercased
// file extensions for formats that are typically already
// compressed. Compressing files that are already compressed
// is inefficient, so use this set of extension to avoid that.
var compressedFormats = map[string]struct{}{
".7z": {},
".avi": {},
".br": {},
".bz2": {},
".cab": {},
".docx": {},
".gif": {},
".gz": {},
".jar": {},
".jpeg": {},
".jpg": {},
".lz": {},
".lz4": {},
".lzma": {},
".m4v": {},
".mov": {},
".mp3": {},
".mp4": {},
".mpeg": {},
".mpg": {},
".png": {},
".pptx": {},
".rar": {},
".sz": {},
".tbz2": {},
".tgz": {},
".tsz": {},
".txz": {},
".xlsx": {},
".xz": {},
".zip": {},
".zipx": {},
}
// DefaultZip is a default instance that is conveniently ready to use.
var DefaultZip = NewZip()

61
zstd.go Normal file
View file

@ -0,0 +1,61 @@
package archiver
import (
"fmt"
"io"
"path/filepath"
"github.com/klauspost/compress/zstd"
)
// Zstd facilitates Zstandard compression.
type Zstd struct {
EncoderOptions []zstd.EOption
DecoderOptions []zstd.DOption
}
// Compress reads in, compresses it, and writes it to out.
func (zs *Zstd) Compress(in io.Reader, out io.Writer) error {
w, err := zstd.NewWriter(out, zs.EncoderOptions...)
if err != nil {
return err
}
defer w.Close()
_, err = io.Copy(w, in)
return err
}
// Decompress reads in, decompresses it, and writes it to out.
func (zs *Zstd) Decompress(in io.Reader, out io.Writer) error {
r, err := zstd.NewReader(in, zs.DecoderOptions...)
if err != nil {
return err
}
defer r.Close()
_, err = io.Copy(out, r)
return err
}
// CheckExt ensures the file extension matches the format.
func (zs *Zstd) CheckExt(filename string) error {
if filepath.Ext(filename) != ".zst" {
return fmt.Errorf("filename must have a .zst extension")
}
return nil
}
func (zs *Zstd) String() string { return "zstd" }
// NewZstd returns a new, default instance ready to be customized and used.
func NewZstd() *Zstd {
return new(Zstd)
}
// Compile-time checks to ensure type implements desired interfaces.
var (
_ = Compressor(new(Zstd))
_ = Decompressor(new(Zstd))
)
// DefaultZstd is a default instance that is conveniently ready to use.
var DefaultZstd = NewZstd()