-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathreader.go
121 lines (97 loc) · 1.95 KB
/
reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
/*
* Author Linvon
*/
package main
import (
"bufio"
"compress/gzip"
"fmt"
"io"
"math"
"os"
"strings"
"sync"
)
func getFile(fileName string) (f *os.File, isGz bool, err error) {
f, err = os.Open(fileName)
if strings.HasSuffix(fileName, ".gz") {
isGz = true
}
return
}
func process(f *os.File, isGz bool) error {
linesPool := sync.Pool{New: func() interface{} {
lines := make([]byte, 250*1024)
return lines
}}
stringPool := sync.Pool{New: func() interface{} {
lines := ""
return lines
}}
var r *bufio.Reader
if isGz {
fz, err := gzip.NewReader(f)
if err != nil {
return err
}
r = bufio.NewReader(fz) //解压成功后读取解压后的文件
} else {
r = bufio.NewReader(f) //解压失败(还是读取原来文件)gz文件还是读取原始文件
}
var wg sync.WaitGroup
for {
buf := linesPool.Get().([]byte)
n, err := r.Read(buf)
buf = buf[:n]
if n == 0 {
if err == io.EOF {
break
}
if err != nil {
fmt.Println(err)
break
}
return err
}
nextUntillNewline, err := r.ReadBytes('\n')
if err != io.EOF {
buf = append(buf, nextUntillNewline...)
}
wg.Add(1)
go func() {
processChunk(buf, &linesPool, &stringPool)
wg.Done()
}()
}
wg.Wait()
return nil
}
func processChunk(chunk []byte, linesPool *sync.Pool, stringPool *sync.Pool) {
var wg2 sync.WaitGroup
logs := stringPool.Get().(string)
logs = string(chunk)
linesPool.Put(chunk)
logsSlice := strings.Split(logs, "\n")
stringPool.Put(logs)
chunkSize := 300
n := len(logsSlice)
noOfThread := n / chunkSize
if n%chunkSize != 0 {
noOfThread++
}
for i := 0; i < (noOfThread); i++ {
wg2.Add(1)
go func(s int, e int) {
defer wg2.Done() //to avaoid deadlocks
for i := s; i < e; i++ {
text := logsSlice[i]
if len(text) == 0 {
continue
}
lineProcess(text)
}
}(i*chunkSize, int(math.Min(float64((i+1)*chunkSize), float64(len(logsSlice)))))
}
wg2.Wait()
logsSlice = nil
}