package tar

import "archive/tar"

Package tar 实现了对 tar 存档的访问。

磁带存档(tar)是一种用于存储文件序列的文件格式,可以以流式方式读取和写入。 本包旨在覆盖该格式的大多数变体,包括由 GNU 和 BSD tar 工具生成的格式。

Example (Minimal)
package main

import (
	"archive/tar"
	"bytes"
	"fmt"
	"io"
	"log"
	"os"
)

func main() {
	// Create and add some files to the archive.
	var buf bytes.Buffer
	tw := tar.NewWriter(&buf)
	var files = []struct {
		Name, Body string
	}{
		{"readme.txt", "This archive contains some text files."},
		{"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"},
		{"todo.txt", "Get animal handling license."},
	}
	for _, file := range files {
		hdr := &tar.Header{
			Name: file.Name,
			Mode: 0600,
			Size: int64(len(file.Body)),
		}
		if err := tw.WriteHeader(hdr); err != nil {
			log.Fatal(err)
		}
		if _, err := tw.Write([]byte(file.Body)); err != nil {
			log.Fatal(err)
		}
	}
	if err := tw.Close(); err != nil {
		log.Fatal(err)
	}

	// Open and iterate through the files in the archive.
	tr := tar.NewReader(&buf)
	for {
		hdr, err := tr.Next()
		if err == io.EOF {
			break // End of archive
		}
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf("Contents of %s:\n", hdr.Name)
		if _, err := io.Copy(os.Stdout, tr); err != nil {
			log.Fatal(err)
		}
		fmt.Println()
	}

}

Output:

Contents of readme.txt:
This archive contains some text files.
Contents of gopher.txt:
Gopher names:
George
Geoffrey
Gonzo
Contents of todo.txt:
Get animal handling license.

Index

Examples

Constants

const (
	// 类型 '0' 表示普通文件。
	TypeReg = '0'

	// 已弃用:请使用 TypeReg 代替。
	TypeRegA = '\x00'

	// 类型 '1' 到 '6' 是仅头标志,可能没有数据体。
	TypeLink    = '1' // 硬链接
	TypeSymlink = '2' // 符号链接
	TypeChar    = '3' // 字符设备节点
	TypeBlock   = '4' // 块设备节点
	TypeDir     = '5' // 目录
	TypeFifo    = '6' // FIFO 节点

	// 类型 '7' 是保留的。
	TypeCont = '7'

	// 类型 'x' 用于 PAX 格式,存储仅与下一个文件相关的键值记录。
	// 本包透明地处理这些类型。
	TypeXHeader = 'x'

	// 类型 'g' 用于 PAX 格式,存储与所有后续文件相关的键值记录。
	// 本包仅支持解析和组合此类头部,但当前不支持跨文件持久化全局状态。
	TypeXGlobalHeader = 'g'

	// 类型 'S' 表示 GNU 格式中的稀疏文件。
	TypeGNUSparse = 'S'

	// 类型 'L' 和 'K' 用于 GNU 格式的元文件,用于存储下一个文件的路径或链接名。
	// 本包透明地处理这些类型。
	TypeGNULongName = 'L'
	TypeGNULongLink = 'K'
)

Header.Typeflag 的类型标志。

Variables

var (
	ErrHeader          = errors.New("archive/tar: invalid tar header")
	ErrWriteTooLong    = errors.New("archive/tar: write too long")
	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
	ErrWriteAfterClose = errors.New("archive/tar: write after close")
	ErrInsecurePath    = errors.New("archive/tar: insecure file path")
)

Types

type FileInfoNames

type FileInfoNames interface {
	fs.FileInfo
	// Uname 应返回用户名。
	Uname() (string, error)
	// Gname 应返回组名。
	Gname() (string, error)
}

FileInfoNames 扩展了 fs.FileInfo。 将其实例传递给 FileInfoHeader 允许调用者通过直接指定 Uname 和 Gname 来避免系统相关的名称查找。

type Format

type Format int

Format represents the tar archive format.

The original tar format was introduced in Unix V7. Since then, there have been multiple competing formats attempting to standardize or extend the V7 format to overcome its limitations. The most common formats are the USTAR, PAX, and GNU formats, each with their own advantages and limitations.

The following table captures the capabilities of each format:

                  |  USTAR |       PAX |       GNU
------------------+--------+-----------+----------
Name              |   256B | unlimited | unlimited
Linkname          |   100B | unlimited | unlimited
Size              | uint33 | unlimited |    uint89
Mode              | uint21 |    uint21 |    uint57
Uid/Gid           | uint21 | unlimited |    uint57
Uname/Gname       |    32B | unlimited |       32B
ModTime           | uint33 | unlimited |     int89
AccessTime        |    n/a | unlimited |     int89
ChangeTime        |    n/a | unlimited |     int89
Devmajor/Devminor | uint21 |    uint21 |    uint57
------------------+--------+-----------+----------
string encoding   |  ASCII |     UTF-8 |    binary
sub-second times  |     no |       yes |        no
sparse files      |     no |       yes |       yes

The table's upper portion shows the Header fields, where each format reports the maximum number of bytes allowed for each string field and the integer type used to store each numeric field (where timestamps are stored as the number of seconds since the Unix epoch).

The table's lower portion shows specialized features of each format, such as supported string encodings, support for sub-second timestamps, or support for sparse files.

The Writer currently provides no support for sparse files.

const (

	// FormatUnknown indicates that the format is unknown.
	FormatUnknown Format

	// FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
	//
	// While this format is compatible with most tar readers,
	// the format has several limitations making it unsuitable for some usages.
	// Most notably, it cannot support sparse files, files larger than 8GiB,
	// filenames larger than 256 characters, and non-ASCII filenames.
	//
	// Reference:
	//	http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
	FormatUSTAR

	// FormatPAX represents the PAX header format defined in POSIX.1-2001.
	//
	// PAX extends USTAR by writing a special file with Typeflag TypeXHeader
	// preceding the original header. This file contains a set of key-value
	// records, which are used to overcome USTAR's shortcomings, in addition to
	// providing the ability to have sub-second resolution for timestamps.
	//
	// Some newer formats add their own extensions to PAX by defining their
	// own keys and assigning certain semantic meaning to the associated values.
	// For example, sparse file support in PAX is implemented using keys
	// defined by the GNU manual (e.g., "GNU.sparse.map").
	//
	// Reference:
	//	http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
	FormatPAX

	// FormatGNU represents the GNU header format.
	//
	// The GNU header format is older than the USTAR and PAX standards and
	// is not compatible with them. The GNU format supports
	// arbitrary file sizes, filenames of arbitrary encoding and length,
	// sparse files, and other features.
	//
	// It is recommended that PAX be chosen over GNU unless the target
	// application can only parse GNU formatted archives.
	//
	// Reference:
	//	https://www.gnu.org/software/tar/manual/html_node/Standard.html
	FormatGNU
)

Constants to identify various tar formats.

func (Format) String

func (f Format) String() string
type Header struct {
	// Typeflag 是头部条目的类型。
	// 零值会根据 Name 中是否存在尾部斜杠自动提升为 TypeReg 或 TypeDir。
	Typeflag byte

	Name     string // 文件条目的名称
	Linkname string // 链接的目标名称(对 TypeLink 或 TypeSymlink 有效)

	Size  int64  // 逻辑文件大小(字节)
	Mode  int64  // 权限和模式位
	Uid   int    // 所有者的用户 ID
	Gid   int    // 所有者的组 ID
	Uname string // 所有者的用户名
	Gname string // 所有者的组名

	// 如果 Format 未指定,则 Writer.WriteHeader 会将 ModTime
	// 舍入到最接近的秒,并忽略 AccessTime 和 ChangeTime 字段。
	//
	// 要使用 AccessTime 或 ChangeTime,请将 Format 指定为 PAX 或 GNU。
	// 要使用亚秒级精度,请将 Format 指定为 PAX。
	ModTime    time.Time // 修改时间
	AccessTime time.Time // 访问时间(需要 PAX 或 GNU 支持)
	ChangeTime time.Time // 变更时间(需要 PAX 或 GNU 支持)

	Devmajor int64 // 主设备号(对 TypeChar 或 TypeBlock 有效)
	Devminor int64 // 次设备号(对 TypeChar 或 TypeBlock 有效)

	// Xattrs 在 "SCHILY.xattr." 命名空间下以 PAX 记录存储扩展属性。
	//
	// 以下在语义上是等价的:
	//  h.Xattrs[key] = value
	//  h.PAXRecords["SCHILY.xattr."+key] = value
	//
	// 调用 Writer.WriteHeader 时,Xattrs 的内容将优先于 PAXRecords 中的内容。
	//
	// 已弃用:请使用 PAXRecords 代替。
	Xattrs map[string]string

	// PAXRecords 是 PAX 扩展头部记录的映射。
	//
	// 用户定义的记录应具有以下形式的键:
	//	VENDOR.keyword
	// 其中 VENDOR 是全大写的某个命名空间,keyword 不得包含 '=' 字符(例如 "GOLANG.pkg.version")。
	// 键和值应为非空的 UTF-8 字符串。
	//
	// 调用 Writer.WriteHeader 时,从 Header 的其他字段派生的 PAX 记录优先于 PAXRecords。
	PAXRecords map[string]string

	// Format 指定 tar 头部的格式。
	//
	// 这由 Reader.Next 设置为对格式的最佳猜测。
	// 由于 Reader 宽松地读取一些不兼容的文件,这可能为 FormatUnknown。
	//
	// 如果在调用 Writer.WriteHeader 时未指定格式,
	// 则它使用能够编码此 Header 的第一种格式(按 USTAR、PAX、GNU 的顺序)(参见 Format)。
	Format Format
}

Header 表示 tar 存档中的单个头部。 某些字段可能未填充。

为了向前兼容,从 Reader.Next 获取 Header 的用户, 以某种方式修改它,然后将其传回 Writer.WriteHeader 时, 应通过创建一个新的 Header 并复制他们希望保留的字段来实现。

func FileInfoHeader

func FileInfoHeader(fi fs.FileInfo, link string) (*Header, error)

FileInfoHeader 从 fi 创建一个部分填充的 Header。 如果 fi 描述一个符号链接,FileInfoHeader 将 link 记录为链接目标。 如果 fi 描述一个目录,则在名称后附加斜杠。

由于 fs.FileInfo 的 Name 方法仅返回它所描述文件的基本名称, 可能需要修改 Header.Name 以提供文件的完整路径名。

如果 fi 实现了 FileInfoNames,则 Header.Gname 和 Header.Uname 由该接口的方法提供。

func (*Header) FileInfo

func (h *Header) FileInfo() fs.FileInfo

FileInfo 返回 Header 的 fs.FileInfo。

type Reader

type Reader struct {
	// contains filtered or unexported fields
}

Reader provides sequential access to the contents of a tar archive. Reader.Next advances to the next file in the archive (including the first), and then Reader can be treated as an io.Reader to access the file's data.

func NewReader

func NewReader(r io.Reader) *Reader

NewReader creates a new Reader reading from r.

func (*Reader) Next

func (tr *Reader) Next() (*Header, error)

Next advances to the next entry in the tar archive. The Header.Size determines how many bytes can be read for the next file. Any remaining data in the current file is automatically discarded. At the end of the archive, Next returns the error io.EOF.

If Next encounters a non-local name (as defined by filepath.IsLocal) and the GODEBUG environment variable contains `tarinsecurepath=0`, Next returns the header with an ErrInsecurePath error. A future version of Go may introduce this behavior by default. Programs that want to accept non-local names can ignore the ErrInsecurePath error and use the returned header.

func (*Reader) Read

func (tr *Reader) Read(b []byte) (int, error)

Read reads from the current file in the tar archive. It returns (0, io.EOF) when it reaches the end of that file, until [Next] is called to advance to the next file.

If the current file is sparse, then the regions marked as a hole are read back as NUL-bytes.

Calling Read on special types like TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what the Header.Size claims.

type Writer

type Writer struct {
	// contains filtered or unexported fields
}

Writer provides sequential writing of a tar archive. Writer.WriteHeader begins a new file with the provided Header, and then Writer can be treated as an io.Writer to supply that file's data.

func NewWriter

func NewWriter(w io.Writer) *Writer

NewWriter creates a new Writer writing to w.

func (*Writer) AddFS

func (tw *Writer) AddFS(fsys fs.FS) error

AddFS adds the files from fs.FS to the archive. It walks the directory tree starting at the root of the filesystem adding each file to the tar archive while maintaining the directory structure.

func (*Writer) Close

func (tw *Writer) Close() error

Close closes the tar archive by flushing the padding, and writing the footer. If the current file (from a prior call to Writer.WriteHeader) is not fully written, then this returns an error.

func (*Writer) Flush

func (tw *Writer) Flush() error

Flush finishes writing the current file's block padding. The current file must be fully written before Flush can be called.

This is unnecessary as the next call to Writer.WriteHeader or Writer.Close will implicitly flush out the file's padding.

func (*Writer) Write

func (tw *Writer) Write(b []byte) (int, error)

Write writes to the current file in the tar archive. Write returns the error ErrWriteTooLong if more than Header.Size bytes are written after Writer.WriteHeader.

Calling Write on special types like TypeLink, TypeSymlink, TypeChar, TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless of what the Header.Size claims.

func (*Writer) WriteHeader

func (tw *Writer) WriteHeader(hdr *Header) error

WriteHeader writes hdr and prepares to accept the file's contents. The Header.Size determines how many bytes can be written for the next file. If the current file is not fully written, then this returns an error. This implicitly flushes any padding necessary before writing the header.