-
Notifications
You must be signed in to change notification settings - Fork 0
/
tar.go
236 lines (194 loc) · 5.8 KB
/
tar.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
package compressor
import (
"archive/tar"
"context"
"errors"
"fmt"
"io"
"io/fs"
"path"
"strings"
"github.com/pchchv/golog"
)
type Tar struct {
// If true, errors that occurred while reading or writing a file in the archive
// will be logged and the operation will continue for the remaining files.
ContinueOnError bool
}
// Interface guards
var (
_ Archiver = (*Tar)(nil)
_ Extractor = (*Tar)(nil)
_ Inserter = (*Tar)(nil)
)
func init() {
RegisterFormat(Tar{})
}
func (Tar) Name() string {
return ".tar"
}
func (t Tar) Match(filename string, stream io.Reader) (MatchResult, error) {
var mr MatchResult
// match filename
if strings.Contains(strings.ToLower(filename), t.Name()) {
mr.ByName = true
}
// match file header
r := tar.NewReader(stream)
_, err := r.Next()
mr.ByStream = err == nil
return mr, nil
}
func (t Tar) Archive(ctx context.Context, output io.Writer, files []File) error {
tw := tar.NewWriter(output)
defer tw.Close()
for _, file := range files {
if err := t.writeFileToArchive(ctx, tw, file); err != nil {
if t.ContinueOnError && ctx.Err() == nil { // context errors should always abort
golog.Info("[ERROR] %v", err)
continue
}
return err
}
}
return nil
}
func (t Tar) ArchiveAsync(ctx context.Context, output io.Writer, files <-chan File) error {
tw := tar.NewWriter(output)
defer tw.Close()
for file := range files {
if err := t.writeFileToArchive(ctx, tw, file); err != nil {
if t.ContinueOnError && ctx.Err() == nil { // context errors should always abort
golog.Info("[ERROR] %v", err)
continue
}
return err
}
}
return nil
}
func (t Tar) Insert(ctx context.Context, into io.ReadWriteSeeker, files []File) error {
// Tar files may end with some, none, or a lot of zero-byte padding.
// According to the specification it should end with two 512-byte trailer records consisting solely
// of null/0 bytes. However, this is not always the case.
// It looks like the only reliable solution is to scan the entire archive to find the last file,
// read its size, then use that to calculate the end of contentы and thus the true length of end-of-archive padding.
// This is a bit more complicated than just adding the size of the last file to the current stream/seek position,
// because we have to accurately align the 512-byte blocks.
// Another option is to scan the file for the last continuous series 0, without interpreting the tar format at all,
// and find the nearest block size offset and start writing there.
// The problem is that you won't know if you've overwritten part of the last file if it ends with all 0s.
var lastFileSize, lastStreamPos int64
const blockSize = 512 // (as of Go 1.17, this is also a hard-coded const in the archive/tar package)
tr := tar.NewReader(into)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
lastStreamPos, err = into.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
lastFileSize = hdr.Size
}
// now calculate the exact location for writing the new file
newOffset := lastStreamPos + lastFileSize
newOffset += blockSize - (newOffset % blockSize) // shift to next-nearest block boundary
_, err := into.Seek(newOffset, io.SeekStart)
if err != nil {
return err
}
tw := tar.NewWriter(into)
defer tw.Close()
for i, file := range files {
if err := ctx.Err(); err != nil {
return err
}
err = t.writeFileToArchive(ctx, tw, file)
if err != nil {
if t.ContinueOnError && ctx.Err() == nil {
golog.Info("[ERROR] appending file %d into archive: %s: %v", i, file.Name(), err)
continue
}
return fmt.Errorf("appending file %d into archive: %s: %w", i, file.Name(), err)
}
}
return nil
}
func (t Tar) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error {
tr := tar.NewReader(sourceArchive)
// important to initialize to non-nil, empty value due to how fileIsIncluded works
skipDirs := skipList{}
for {
if err := ctx.Err(); err != nil {
return err
}
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
if t.ContinueOnError && ctx.Err() == nil {
golog.Info("[ERROR] Advancing to next file in tar archive: %v", err)
continue
}
return err
}
if !fileIsIncluded(pathsInArchive, hdr.Name) {
continue
}
if fileIsIncluded(skipDirs, hdr.Name) {
continue
}
if hdr.Typeflag == tar.TypeXGlobalHeader {
// ignore the pax global header from git-generated tarballs
continue
}
file := File{
FileInfo: hdr.FileInfo(),
Header: hdr,
FileName: hdr.Name,
LinkTarget: hdr.Linkname,
Open: func() (io.ReadCloser, error) { return io.NopCloser(tr), nil },
}
err = handleFile(ctx, file)
if errors.Is(err, fs.SkipDir) {
// if a directory, skip this path
// if a file, skip the folder path
dirPath := hdr.Name
if hdr.Typeflag != tar.TypeDir {
dirPath = path.Dir(hdr.Name) + "/"
}
skipDirs.add(dirPath)
} else if err != nil {
return fmt.Errorf("handling file: %s: %w", hdr.Name, err)
}
}
return nil
}
func (Tar) writeFileToArchive(ctx context.Context, tw *tar.Writer, file File) error {
if err := ctx.Err(); err != nil {
return err
}
hdr, err := tar.FileInfoHeader(file, file.LinkTarget)
if err != nil {
return fmt.Errorf("file %s: creating header: %w", file.FileName, err)
}
hdr.Name = file.FileName // complete path, since FileInfoHeader() only has base name
if err := tw.WriteHeader(hdr); err != nil {
return fmt.Errorf("file %s: writing header: %w", file.FileName, err)
}
// write the file body only if it actually exists
// (directories and links do not have a body)
if hdr.Typeflag != tar.TypeReg {
return nil
}
if err := openAndCopyFile(file, tw); err != nil {
return fmt.Errorf("file %s: writing data: %w", file.FileName, err)
}
return nil
}