-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrepurl.go
79 lines (69 loc) · 1.88 KB
/
grepurl.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package main
import (
"fmt"
"log"
"os"
"strconv"
"github.com/danohuiginn/grepurl/grepurl"
"github.com/urfave/cli"
)
func NotImplemented(c *cli.Context) error {
fmt.Println("Not implemented yet")
return nil
}
func Sample(cli *cli.Context) error {
files := []string{"/tmp/urlsample.txt"}
urlstore, trigrams := grepurl.RunImport(files)
fmt.Println("import complete")
query := ".*.gov"
ch := make(chan string)
go grepurl.RunQuery(query, trigrams, urlstore, ch)
for result := range ch {
fmt.Println(result)
}
return nil
}
func Download(cli *cli.Context) error {
/*cmd := exec.Command("/home/dan/.virtualenvs/yl/bin/aws", "s3", "cp", "s3://commoncrawl/crawl-data/CC-MAIN-2016-22/segments/1464049270134.8/wat/CC-MAIN-20160524002110-00000-ip-10-185-217-139.ec2.internal.warc.wat.gz", "/tmp/mydl.gz")
output, err := cmd.CombinedOutput()
log.Println(strings.Join(cmd.Args, " "))
log.Println(err)
log.Println(string(output))
return nil
fn := "crawl-data/CC-MAIN-2016-22/segments/1464049270134.8/wat/CC-MAIN-20160524002110-00000-ip-10-185-217-139.ec2.internal.warc.wat.gz"
fn = "/tmp/unzip"*/
log.Println("starting upload")
indices := grepurl.ListIndexFiles("2016-05")
linecount := grepurl.UploadURLs(indices[0], true)
log.Println("uploaded " + strconv.Itoa(linecount) + " lines.")
return nil
}
func main() {
app := cli.NewApp()
app.Name = "grepurl"
app.Flags = []cli.Flag{
cli.StringFlag{
Name: "datadir",
Value: "/tmp/grepurl/",
Usage: "Directory for grepurl to keep its data. Should have plenty of space, and ideally be accessible from multiple machines",
},
}
app.Commands = []cli.Command{
{
Name: "search",
Usage: "look for an url",
Action: NotImplemented,
},
{
Name: "sample",
Usage: "give it a spin",
Action: Sample,
},
{
Name: "download",
Usage: "download some crawl archives",
Action: Download,
},
}
app.Run(os.Args)
}