-
Notifications
You must be signed in to change notification settings - Fork 0
/
reader.go
114 lines (102 loc) · 3 KB
/
reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package pen
import (
"errors"
"io"
"os"
)
var EBADSLT = errors.New("checksum mismatch")
var EINVAL = errors.New("invalid argument")
type Reader struct {
file *os.File
blockSize int
}
// Create New AppendReader (you just nice wrapper around ReadFromReader adn ScanFromReader)
// it is *safe* to use it concurrently
// Example usage
// r, err := NewReader(filename, 4096)
// if err != nil {
// panic(err)
// }
// // read specific offset
// data, _, err := r.Read(docID)
// if err != nil {
// panic(err)
// }
// // scan from specific offset
// err = r.Scan(0, func(data []byte, offset, next uint32) error {
// log.Printf("%v",data)
// return nil
// })
//
// each Read requires 2 syscalls, one to read the header and one to read the data (since the length of the data is in the header).
// You can reduce that to 1 syscall if your data fits within 1 block, do not set blockSize < 16 because this is the header length.
// blockSize 0 means 16
func NewReader(filename string, blockSize int) (*Reader, error) {
if blockSize == 0 {
blockSize = 16
}
if blockSize < 16 {
return nil, EINVAL
}
fd, err := os.OpenFile(filename, os.O_RDONLY, 0600)
if err != nil {
return nil, err
}
return NewReaderFromFile(fd, blockSize)
}
func NewReaderFromFile(fd *os.File, blockSize int) (*Reader, error) {
if blockSize == 0 {
blockSize = 16
}
if blockSize < 16 {
return nil, EINVAL
}
return &Reader{
file: fd,
blockSize: blockSize,
}, nil
}
// Scan the open file, if the callback returns error this error is returned as the Scan error. just a wrapper around ScanFromReader.
func (ar *Reader) Scan(offset uint32, cb func([]byte, uint32, uint32) error) error {
return ScanFromReader(ar.file, offset, ar.blockSize, cb)
}
// Read at specific offset (just wrapper around ReadFromReader), returns the data, next readable offset and error
func (ar *Reader) Read(offset uint32) ([]byte, uint32, error) {
return ReadFromReader(ar.file, offset, ar.blockSize)
}
func (ar *Reader) Close() error {
return ar.file.Close()
}
// Reads specific offset. returns data, nextOffset, error. You can
// ReadFromReader(nextOffset) if you want to read the next document, or
// use the Scan() helper
func ReadFromReader(reader io.ReaderAt, offset uint32, blockSize int) ([]byte, uint32, error) {
b, err := ReadFromReader64(reader, uint64(offset*PAD), blockSize)
if err != nil {
return nil, 0, err
}
nextOffset := (offset + ((uint32(16+len(b)))+PAD-1)/PAD)
return b, uint32(nextOffset), nil
}
// Scan ReaderAt, if the callback returns error this error is returned as the Scan error
func ScanFromReader(reader io.ReaderAt, offset uint32, blockSize int, cb func([]byte, uint32, uint32) error) error {
for {
data, next, err := ReadFromReader(reader, offset, blockSize)
if err == io.EOF {
return nil
}
if err == EBADSLT {
// assume corrupted file, so just skip until we find next valid entry
offset++
continue
}
if err != nil {
return err
}
err = cb(data, offset, next)
if err != nil {
return err
}
offset = next
}
}