-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextractThought.js
74 lines (57 loc) · 1.83 KB
/
extractThought.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
var fs = require('fs');
//this pattern matches the 'Thought' along with its author
//var pattern = /<strong>“[\n\w\s:<>\/,.]*”<\/strong>[\w\n\s<>\/\;,]*~[\w\s]*/;
var pattern = /<strong>[\w\d;:’\s.\/<,!'"&#“\-”>]*<\/strong>[\w\s<>\/,;\n]*~[\w\s-]*/;
//this pattern matches all the html files
var htmlfile = /^.*.html$/;
//reading the directory
var dir = fs.readdirSync('./html/');
var file;
count=1;
var callBack = function (err,fd){
//console.log(arguments[0]+arguments[1]+arguments[2]);
console.log(dir[this.file]);
if(err){
console.log("error");
}
else{
if(pattern.exec(fd.toString())!=null){
//pattern.exec(fd.toString())[0] is the relevent text (thought and the author)
console.log(pattern.exec(fd.toString())[0]);
//getThoughtandAuthor();
count++;
fs.writeFileSync('./txt/t'+Date.now()+'_'+count+'.txt', pattern.exec(fd.toString())[0]);
console.log('Saved Ithink');
}else{
console.log("didn't match file ******************************************" + dir[this.file]);
}
}
};
// var getThoughtandAuthor= function(text,callback) {
// };
for (file in dir){
if(htmlfile.test(dir[file])){
//var fd = fs.readFileSync(dir[file]);
// fs.readFile(dir[file],function(err,fd){
// if(err){
// console.log("error");
// }
// else{
// if(pattern.exec(fd.toString())!=null){
// console.log(pattern.exec(fd.toString())[0]);
// }else{
// console.log("didn't match file " + dir[file]);
// }
// }
// });
fs.readFile('./html/'+dir[file],callBack.bind({file:file}));
}else{
console.log("********************************not a html file " + dir[file]);
}
}
// var files = fs.readFile('h1438350823312_1.html',function(err,fd){
// if(err)
// console.log("error");
// else
// console.log(pattern.exec(fd.toString())[0]);
// });