-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathurl-finder.js
104 lines (89 loc) · 2.61 KB
/
url-finder.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
const readline = require('readline');
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
const { URL } = require('url');
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
function crawl(url) {
const visitedUrls = new Set();
const directories = [];
async function visit(url) {
try {
// Send a GET request to the URL
const response = await axios.get(url);
// Load the HTML content
const $ = cheerio.load(response.data);
// Find all anchor tags and extract the URLs
$('a').each((index, element) => {
const href = $(element).attr('href');
if (href) {
const absoluteUrl = resolveAbsoluteUrl(url, href);
if (isInternalUrl(url, absoluteUrl) && !visitedUrls.has(absoluteUrl)) {
visitedUrls.add(absoluteUrl);
if (isDirectory(absoluteUrl)) {
directories.push(absoluteUrl);
visit(absoluteUrl);
}
}
}
});
} catch (error) {
console.error(`Failed to visit URL: ${url}`);
}
}
function resolveAbsoluteUrl(baseUrl, relativeUrl) {
try {
return new URL(relativeUrl, baseUrl).href;
} catch (error) {
console.error(`Failed to resolve absolute URL for: ${relativeUrl}`);
return null;
}
}
function isInternalUrl(baseUrl, url) {
try {
const baseHostname = new URL(baseUrl).hostname;
const urlHostname = new URL(url).hostname;
return baseHostname === urlHostname;
} catch (error) {
console.error(`Failed to determine if URL is internal: ${url}`);
return false;
}
}
function isDirectory(url) {
return url.endsWith('/');
}
return visit(url)
.then(() => directories)
.catch(error => {
console.error(error);
return [];
});
}
console.log('\x1b[33m%s\x1b[0m', 'Author: @Securi3yTalent');
console.log('\x1b[33m%s\x1b[0m', 'join_us: https://t.me/Securi3yTalent');
console.log(`
example: node script.js then inter URL (https://example.com)
`)
//cyan
rl.question('Enter the website URL: ', (url) => {
crawl(url)
.then(directories => {
const outputFilePath = 'directories.txt';
const outputStream = fs.createWriteStream(outputFilePath);
outputStream.once('open', () => {
directories.forEach(directory => {
outputStream.write(`${directory}\n`);
});
outputStream.end();
console.log(`Directories saved to ${outputFilePath}`);
rl.close();
});
})
.catch(error => {
console.error(error);
rl.close();
});
});