This repository has been archived by the owner on Jul 30, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
index.js
154 lines (153 loc) · 5.92 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
const puppeteer = require('puppeteer');
const fastcsv = require('fast-csv');
const fs = require('fs');
const digits = /\d+/g;
const decimals = /\d*\,?\d*/g;
const now = new Date().getTime();
// refer to list of makes here https://au.yachtworld.com/core/listing/cache/dimensionValues.jsp?sm=3&searchtype=advancedsearch&Ntk=boatsUK&ftid=0&N=2279+3945&enid=0&toYear=2010&hmid=0&boatsAddedSelected=-1&slim=quick¤cyid=1008&luom=126&toLength=60&Ne=15&fromLength=40&cit=true&fromYear=1990
const fromLength = 40
const toLength = 60
const fromYear = 1990
const toYear = 2010
const currencyId = 1004 // euros
const makes = [
'Bavaria',
'Amel',
'Hallberg-Rassy',
'Beneteau',
'Oyster',
'Moody',
'Najad',
'Westerly',
'Dehler',
'Hanse',
'Dufour',
'Elan',
'Catalina',
'Grand Soleil',
'Hunter',
'Island Packet',
'Jeanneau',
'Wauquiez'
];
const hasMore = async (page) => {
try {
const searchResultsCount = await page.$('div.searchResultsCount');
const __searchResultsCount = await searchResultsCount.evaluate(node => node.innerText);
const groups = __searchResultsCount.match(decimals).filter(i => i).map(i => i.replace(',', ''));
return parseInt(groups[2]) > parseInt(groups[1]);
} catch (e) {
console.error(e)
return false
}
};
const parseListings = async (page, make) => {
const prices = [];
try {
const boats = await page.$$('div.information');
for (let boat of boats) {
try {
const price = await boat.$('div.price');
const model = await boat.$('div.make-model');
if (price && model) {
const _price = await price.evaluate(node => node.innerText);
let __price;
try {
__price = parseInt(_price.match(digits).join(''))
} catch (e) {
// these happen frequently when there's no price and it's just "Ring" value
// console.error(e)
}
const _model = await model.evaluate(node => node.innerText);
const anchor = await model.$('a');
const url = await anchor.evaluate(node => node.href);
const locationDiv = await boat.$('div.location');
const area = await locationDiv.evaluate(node => node.innerText);
const modelParams = _model.match(digits);
const __model = _model.split(modelParams[1])[1].trim();
if (__price && __model) {
prices.push({
make: make,
model: __model,
length: modelParams[0],
year: modelParams[1],
price: __price,
area: area === 'Sale Pending' ? '' : area,
salePending: area === 'Sale Pending',
url: url
})
}
}
} catch (e) {
console.error(e)
}
}
} catch (e) {
console.error(e)
}
return prices;
};
const exitIfAlreadyRanToday = () => {
var lastRunDate = new Date(parseInt(fs.readFileSync('lastrundate', "utf8")))
var now = new Date();
if(lastRunDate.setHours(0,0,0,0) === now.setHours(0,0,0,0)) {
process.exit(0)
}
}
(async () => {
exitIfAlreadyRanToday()
const browser = await puppeteer.launch({
// headless: false,
// devtools: true,
// slowMo: 50,
timeout: 60 * 1000,
defaultViewport: {
width: 2048,
height: 1280
}
});
const getPromise = async (make) => {
try {
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (req) => {
if(req.resourceType() === 'stylesheet' || req.resourceType() === 'font' || req.resourceType() === 'image' || req.resourceType() === 'script'){
req.abort();
} else {
req.continue();
}
});
const url = `https://au.yachtworld.com/core/listing/cache/searchResults.jsp?is=false&sm=3&searchtype=advancedsearch&Ntk=boatsUK&ftid=0&enid=0&toYear=${toYear}&type=%28Sail%29&hmid=0&boatsAddedSelected=-1&slim=quick¤cyid=${currencyId}&luom=126&toLength=${toLength}&cit=true&fromLength=${fromLength}&fromYear=${fromYear}&man=${make}&ps=50&No=0&Ns=PBoat_sortByPriceAsc|0`;
await page.goto(url, {
waitUntil: "networkidle2"
});
let _prices = await parseListings(page, make)
while (await hasMore(page)) {
try {
const _url2 = await page.$('#searchResultsHeader > div.searchResultsNav > span.navNext > a');
const url2 = await _url2.evaluate(node => node.href);
await page.goto(url2, {
waitUntil: "networkidle2"
});
_prices = [..._prices, ...await parseListings(page, make)];
} catch (e) {
console.error(e)
}
}
await page.close()
return _prices
} catch (e) {
console.error(e)
}
}
const prices = (await Promise.all(makes.map(getPromise))).flat()
const ws = fs.createWriteStream(`yachtworld-${now}.csv`);
fastcsv
.write(prices, {headers: true, quote: true, quoteColumns: true})
.pipe(ws);
fs.writeFileSync(`yachtworld-${now}.json`, JSON.stringify(prices));
// console.log(prices.sort((x, y) => (x.price == y.price) ? 0 : ((x.price > y.price) ? 1 : -1)));
// console.log(prices);
await browser.close();
fs.writeFileSync('lastrundate', now)
})();