-
Notifications
You must be signed in to change notification settings - Fork 6
/
index.js
92 lines (65 loc) · 2.28 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
var request = require("request");
var cheerio = require("cheerio");
var http = require("http");
var fs = require("fs");
var async = require("async");
http.globalAgent.maxSockets = 1000;
var volumes = [];
var website = "http://www.sahih-bukhari.com";
request(website,function(err,response,html){
var $ = cheerio.load(html);
var itemProcessed = 0;
var menu = $("tr td#menu-m").children();
var book_links;
for(var i=0;i<menu.length;i++){
if($(menu[i]).is("div[align='left']")){
var vol = {
name: "",
books: []
};
volumes.push(vol);
vol.name = $(menu[i]).find("strong").text();
console.log(vol.name);
book_links = $(menu[i]).next("p").children();
for(var j=0;j<book_links.length;j++){
if($(book_links[j]).is("a")){
var book = {
name: $(book_links[j]).text(),
hadiths: []
};
vol.books.push(book);
var url = website+"/"+$(book_links[j]).attr("href");
saveHadiths(book,url);
}
}
}
}
function saveHadiths(book,url){
console.log(url);
request(url,function(err,res,html){
if(err){
console.log("error on url: "+url);
return console.log(err);
}
var $ = cheerio.load(html);
var hadiths = $("table[width='730']").children();
for(var i=0;i<hadiths.length;i++){
if($(hadiths[i]).next().next().find("td").attr("align") == 'justify'){
book.hadiths.push({
info: $(hadiths[i]).find("td").text(),
by: $(hadiths[i]).next("tr").find("td").text(),
text: $(hadiths[i]).next("tr").next("tr").find("td").text()
});
}
}
console.log("content saved from "+url);
saveToFile();
})
}
function saveToFile(){
fs.writeFile("sahih_bukhari.json",JSON.stringify(volumes),function(err){
if(err) return console.log(err);
});
console.log("done");
}
});