forked from xinyu2428/HTML_TOOLS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHTML工具-资产文本清洗_v2.html
362 lines (320 loc) · 14.5 KB
/
HTML工具-资产文本清洗_v2.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>资产文本清洗</title>
</head>
<style type="text/css">
/* https://c.runoob.com/codedemo/3117/ */
body {
margin-top: 0px;
margin-bottom: 0px;
margin-left: 0px;
margin-right: 0px;
/* border: 0px;
padding: 0px; */
background-color: grey;
}
div.btn-group {
display: inline-block;
width: 15%;
height: 300px;
vertical-align: top;
/* position: relative; */
}
.btn-group button {
background-color: #4CAF50;
/* Green background */
border: 1px solid green;
/* Green border */
color: white;
/* White text */
padding: 10px 24px;
/* Some padding */
cursor: pointer;
/* Pointer/hand icon */
width: 100%;
/* Set a width if needed */
display: block;
/* Make the buttons appear below each other */
}
.btn-group button:not(:last-child) {
border-bottom: none;
/* Prevent double borders */
}
/* Add a background color on hover */
.btn-group button:hover {
background-color: #3e8e41;
}
textarea {
border: 1px solid rgb(128, 0, 107);
background-color: rgb(68, 68, 68);
color: rgb(16, 224, 86);
font-family: Arial, Helvetica, sans-serif;
font-size: 15px;
/* margin-left:0px;
padding-left: 10px; */
}
/* textarea:focus {
border: 1px solid rgb(221, 56, 84);
} */
</style>
<body>
<textarea id="history" cols="30" rows="40" placeholder="需要两个输入框时使用" style="width: 15%;"></textarea>
<textarea id="new" cols="80" rows="40" placeholder="基本输入位置" style="width: 40%;"></textarea>
<div class="btn-group">
<button id="quchong" onclick="dataCleaning()" title="示例: 放入文本,IP,子域名...">去重排序求差集</button>
<button id="quchong" onclick="mergeSubdomainAndPort()">组合域名与端口</button>
<button id="quchong" onclick="getNmapResult()" title="Nmap扫描记录信息提取,将IP与端口合并成URL">Nmap端口提取</button>
<button onclick="getIP()" title="提取文本中所有的IP">IP提取</button>
<button onclick="getURL()">子域名提取</button>
<button onclick="getURL2()">根域名提取</button>
<button onclick="getIP2()" title="示例(支持B段C段): 192.168.7.*;192.168.*.1;192.168.*.*">IP段生成</button>
</div>
<textarea id="result" cols="60" rows="40" placeholder="显示结果位置" style="width: 25%;"></textarea>
</body>
<script type="text/javascript">
var dom_textarea_history = document.getElementById("history") //历史记录资产
var dom_textarea_new = document.getElementById("new") //新资产
var dom_textarea_result = document.getElementById("result") //结果输出
function getIP2() {
var list_result = []
var list_new_text = unique(dom_textarea_new.value.split("\n"));
var reg = /\*/g
// if(list_new_text.length==1){
// var str = list_new_text[0]
// for(var i=0; i<256; i++){
// list_result.push(str.replace("*", i))
// }
// }
for (const key in list_new_text) {
var str = list_new_text[key]
if (str.match(reg) != null) {
var num = str.match(reg).length
if (num == 1) {
for (var i = 0; i < 256; i++) {
list_result.push(str.replace("*", i))
}
str = str.replace("*", 0)
console.log("第" + key + "行: (" + list_new_text[key] + ")识别成功")
} else if (num == 2) {
for (var i = 0; i < 256; i++) {
var str2 = str.replace("*", i)
for (var j = 0; j < 256; j++) {
list_result.push(str2.replace("*", j))
}
}
console.log("第" + key + "行: (" + list_new_text[key] + ")识别成功")
} else {
alert("不支持A段生成")
}
} else {
console.log("第" + key + "行: (" + list_new_text[key] + ")未识别到 * 号标记")
}
}
dom_textarea_result.value = "---<" + list_result.length + ">---\n" + list_result.join("\n")
}
/*合并子域与端口信息, 暂时用处不大*/
function mergeSubdomainAndPort() {
var list_result = []
var list_history_text = unique(dom_textarea_history.value.split("\n"));
var list_new_text = unique(dom_textarea_new.value.split("\n")); //以换行符为分隔符获取文本数组并去重
dom_textarea_history.value = list_history_text.join("\n")
dom_textarea_new.value = list_new_text.join("\n")
for (const i in list_history_text) {
var str_list = list_history_text[i].split(":")
console.log(str_list)
for (const j in list_new_text) {
var str2_list = list_new_text[j].split(":")
if (str_list[1] == str2_list[0]) {
list_result.push("http://" + str_list[0] + ":" + str2_list[1])
list_result.push("https://" + str_list[0] + ":" + str2_list[1])
console.log("http://" + str_list[0] + ":" + str2_list[1])
}
}
}
list_result = unique(list_result)
dom_textarea_result.value = "---组合地址数量<" + list_result.length + ">---\n" + list_result.join("\n")
console.log("测试456")
}
function getURL() {
// alert("感觉实际用途不大,先放着")
var list_result = []
var reg = /([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}/g
//var ref = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/
var list_new_text = dom_textarea_new.value.split("\n"); //以换行符为分隔符获取文本数组
for (var i = 0; i < list_new_text.length; i++) {
console.log(list_new_text[i])
// console.log(reg.test(ip)) //匹配成功返回true
var list_text = list_new_text[i].match(reg)
if (list_text != null) {
console.log(list_text)
list_result.push.apply(list_result, list_text)
} else {
console.log("第" + (i + 1) + "行未提取到IP")
}
}
list_result = unique(list_result) //去空行(忽略)-->去重-->排序
dom_textarea_result.value = "---提取URL数量<" + list_result.length + ">---\n" + list_result.join("\n")
}
function getURL2() {
// alert("添加根域名提取")
var list_result = []
var reg = /([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}/g
//var ref = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/
var list_new_text = dom_textarea_new.value.split("\n"); //以换行符为分隔符获取文本数组
for (var i = 0; i < list_new_text.length; i++) {
//console.log(list_new_text[i])
//console.log(reg.test(ip)) //匹配成功返回true
var list_text = list_new_text[i].match(reg)
if (list_text != null) {
console.log(list_text)
var domain = list_text[0]//xxx.com.cn
var root_domain
var sanji
var siji
if (domain.indexOf('com.cn') !== -1) {//若以这些字符串结尾 为"多级"根域名
sanji = domain.replace(".com.cn", "")// aaa.xxx
siji = sanji.substring(sanji.lastIndexOf(".") + 1)//xxx
root_domain = siji + ".com.cn"//xxx.com.cn
list_result.push(root_domain)
} else if (domain.indexOf('edu.cn') !== -1) {//若以这些字符串结尾 为"多级"根域名
sanji = domain.replace(".edu.cn", "")// aaa.xxx
siji = sanji.substring(sanji.lastIndexOf(".") + 1)//xxx
root_domain = siji + ".edu.cn"//xxx.com.cn
list_result.push(root_domain)
} else {
var com = domain.substring(domain.lastIndexOf(".") + 1)//com
console.log(com)
var front_com = domain.replace("." + com, "")//aaa.xxx
domain = front_com.substring(front_com.lastIndexOf(".") + 1)//xxx
root_domain = domain + "." + com//xxx.com
console.log(root_domain)
list_result.push(root_domain)
}
} else {
console.log("第" + (i + 1) + "行未提取到IP")
}
}
list_result = unique(list_result) //去空行(忽略)-->去重-->排序
dom_textarea_result.value = "---提取URL数量<" + list_result.length + ">---\n" + list_result.join("\n")
}
/*
将提供的IP地址转换为对应的C段输出
*/
function getIP() {
var list_result = []
var reg = /(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|[0-9])\.((1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.){2}(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)/g
var list_new_text = dom_textarea_new.value.split("\n"); //以换行符为分隔符获取文本数组
for (var i = 0; i < list_new_text.length; i++) {
console.log(list_new_text[i])
// console.log(reg.test(ip)) //匹配成功返回true
var list_text = list_new_text[i].match(reg)
if (list_text != null) {
console.log(list_text)
list_result.push.apply(list_result, list_text)
} else {
console.log("第" + (i + 1) + "行未提取到IP")
}
}
list_result = unique(list_result) //去空行(忽略)-->去重-->排序
dom_textarea_result.value = "---提取IP数量<" + list_result.length + ">---\n" + list_result.join("\n")
var cduan_result_list = []
for (var i = 0; i < list_result.length; i++) {
var temp = list_result[i].substr(0, list_result[i].lastIndexOf(".")) + ".0/24"
cduan_result_list.push(temp)
}
var dic = {}
for (const key in cduan_result_list) {
console.log(cduan_result_list[key])
if (dic.hasOwnProperty(cduan_result_list[key])) {
dic[cduan_result_list[key]] += 1
} else {
dic[cduan_result_list[key]] = 1
}
}
var dic2 = Object.keys(dic).sort(function (a, b) {
return dic[b] - dic[a];
});
dom_textarea_result.value += "\n\n\n---提取C段数量<" + dic2.length + ">---\n"
for (const key in dic2) {
console.log("key: " + dic2[key] + " ,value: " + dic[dic2[key]]);
dom_textarea_result.value += dic2[key] + "(" + dic[dic2[key]] + ")\n"
}
}
/*
Nmap扫描记录处理
*/
function getNmapResult() {
var list_result = []
var list_new_text = dom_textarea_new.value.split("\n"); //以换行符为分隔符获取文本数组
//var reg_ip = /\(((1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|[0-9])\.((1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.){2}(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d))\)/
var reg_ip = /((1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|[0-9])\.((1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.){2}(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d))/
var reg_port = /(\d*)\/tcp/
for (const i in list_new_text) {
var ip, port
let temp = list_new_text[i]
var list_text = temp.match(reg_ip)
if (list_text != null) {
ip = list_text[1]
// console.log(ip)
} else {
if (reg_port.test(temp)) {
var list_text = temp.match(reg_port)
port = list_text[1]
console.log(ip + ":" + port)
//未找到IP直接匹配到端口的情况忽略
if (ip) {
list_result.push("http://" + ip + ":" + port)
}
}
}
}
list_result = unique(list_result)
dom_textarea_result.value = "---合成地址数量<" + list_result.length + ">---\n" + list_result.join("\n")
}
function dataCleaning() {
// var dom_textarea_history = document.getElementById("history") //历史记录资产
// var dom_textarea_new = document.getElementById("new") //新资产
// var dom_textarea_result = document.getElementById("result") //结果输出
var list_history_text = unique(dom_textarea_history.value.split("\n"));
var list_new_text = unique(dom_textarea_new.value.split("\n")); //以换行符为分隔符获取文本数组并去重
//js 获取两个数组的交集,并集,补集,差集
//https://www.cnblogs.com/web-shu/p/13551942.html
//差集:数组arr1相对于arr2所没有的
let diff = list_new_text.filter(function (val) {
return list_history_text.indexOf(val) === -1
})
//交集
let intersection = list_history_text.filter(function (val) {
return list_new_text.indexOf(val) > -1
})
dom_textarea_history.value = list_history_text.join("\n")
dom_textarea_new.value = list_new_text.join("\n")
dom_textarea_result.value = "---差集<" + diff.length + ">---\n" + diff.join("\n")
dom_textarea_result.value += "\n\n\n---交集<" + intersection.length + ">---\n" + intersection.join("\n")
console.log("测试123")
}
//通用
//功能: 去空行-->去重-->排序
function unique(arr) {
var result = [],
hash = {};
for (var i = 0, elem; (elem = arr[i]) != null; i++) {
elem = elem.trim(); //去除收尾空字符
//判断当前元素是否为空行
if (elem != "") {
if (!hash[elem]) {
result.push(elem);
hash[elem] = true;
}
// console.log(elem)
}
}
// return result;
return result.sort(function (a, b) {
return a.localeCompare(b)
}); //排序后在输出
}
</script>
</html>