forked from mlwmlw/pcc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunit.ls
138 lines (125 loc) · 3.25 KB
/
unit.ls
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
require! <[http querystring request cheerio q string moment mongodb]>
base = \http://web.pcc.gov.tw/tps/main/pss/pblm/tender/basic/search/mainListCommon.jsp
request.defaults {
pool: { maxSockets: 4}
}
db.getCollection('pcc').aggregate(
[
{$match: {publish: {$gt: ISODate("2018-01-01")}}},
{$project: {unit_id: 1, unit: 1}},
{$lookup: {
as: 'u',
from: 'unit',
localField: "unit_id",
foreignField: "_id"
}},
{$project: {_id: "$unit_id", unit: 1, u: {$size: "$u"}}},
{$match: {u: 0, unit: {$ne: null}}},
{$group: {_id: "$_id", name: {$max: "$unit"}}},
]).toArray()
client = mongodb.MongoClient
db = null
client.connect "mongodb://node:[email protected]:10024/pcc", (err, _db) ->
console.log \db-ready
db := _db
connectDB = (cb) ->
if(db)
cb db
else
setTimeout !->
connectDB cb
, 100
postUnit = (dn, orgid, orgname, cb) ->
post = {
changeDn: dn,
orgId: orgid,
orgName: orgname
}
err, res <- request.post \http://web.pcc.gov.tw/tps/main/pss/pblm/tender/basic/search/orgListCommon.jsp {form: post}
if err
console.log err
return
cb(cheerio.load res.body)
parseTable = (name, parentId, $) ->
rows = []
$('#page table').last!.find('tr').each (j) ->
$tds = $("td", this)
if $tds.eq(0).text!.replace(/\s+/, '') in ['機關代碼', '']
return
rows.push {
_id: $tds.eq(0).text!,
parent: if parentId == $tds.eq(0).text! then null else parentId,
name: $tds.eq(1).text!
}
do
db <- connectDB!
collection = db.collection 'unit'
if rows.length > 0
bulk = collection.initializeUnorderedBulkOp!
for row in rows
bulk.find {_id: row._id} .upsert!.replaceOne row
bulk.execute (err, res) ->
console.log parentId + ' ' + name + ' - ' + rows.length
if err
console.log err
#collection.save rows, (err, res) ->
# console.log rows
# if err
# console.log 'err!!', err
#, {continueOnError: true, safe: true}
#console.log $item.text! + " - [" + $tds.eq(0).text! + "] - " + $tds.eq(1).text!
getUnit = (url, cb)->
err, res <- request.get url
if !err
cb(cheerio.load res.body)
main = (url, link, parentId) ->
db <- connectDB!
collection = db.collection 'unit'
bulk = collection.initializeUnorderedBulkOp!
handler = ($) ->
if link
parseTable link, parentId, $
$ 'u' .parent 'a' .each (i) ->
$item = $(this)
href = base.replace(/[^\/]+$/, '') + $item.attr 'href'
name = $item.text!.replace /\s+|\(.*\)/g, ''
if name in ['招標公告', '決標公告']
return
id = $item.text!.replace /^[^(]+\(|\)|\s*/g, ''
if parentId == null || !/\./.test id
bulk.find {_id: id} .upsert!.replaceOne {
_id: id,
parent: parentId,
name: name
}
if bulk.count
bulk.count++
else
bulk.count = 1
res = href.match /'(.+)'/
if res
split = res[1].split(/', *'/)
if split[1] != ''
p = split[1]
else
p = parentId
if split[2] != ''
name = split[2]
$sub <- postUnit split[0], split[1], split[2]
main $sub, name, p
else
main href, name, id
if bulk.count
bulk.execute (err, res) ->
if err
console.log err
else
console.log parentId + " insert main " + bulk.count
if typeof url is \string
$ <- getUnit url
handler $
else
$ = url
handler $
main base
console.log \done