This repository has been archived by the owner on Dec 4, 2017. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapeasy.js
executable file
·82 lines (82 loc) · 3.63 KB
/
scrapeasy.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
var request = require("request");
var toSource = require("tosource");
var parsonic = require("parsonic");
var scraper = require("./scraper");
var maxSockets = 10;
var stringifiedScraper = {};
Object.getOwnPropertyNames(scraper).filter(function(f) {
stringifiedScraper[f] = toSource(scraper[f]);
});
module.exports = function(url, pattern, callback) {
var results = {};
var options = {
'pool.maxSockets': maxSockets,
url: url,
headers: {
'User-Agent': 'scrapeasy'
}
};
request(options, function(err, res, data) {
if (err) {
callback(err);
} else {
try {
if (res.statusCode !== 200) {
console.log("Status:", res.statusCode);
}
parsonic.load(data, {
pattern: pattern,
scraper: stringifiedScraper
}, function(document, args) {
var pattern = args.pattern;
var elements = {};
var results = {};
var toEval = "";
Object.getOwnPropertyNames(args.scraper).filter(function(f) {
toEval += args.scraper[f];
});
eval(toEval);
var selectors = Object.getOwnPropertyNames(pattern);
var asProperties = {};
elements["*"] = document.querySelectorAll("*");
for (var i = 0; i < selectors.length; i++) {
elements[selectors[i]] = document.querySelectorAll(selectors[i]);
pattern[selectors[i]].filter(function(rule) {
var property = rule.as.split("[n]");
if (!property[1].length) {
results[property[0]] = getValuesAsElements(rule, elements[selectors[i]]);
} else {
if (typeof asProperties[property[0]] === "undefined") {
asProperties[property[0]] = {};
if (typeof results[property[0]] === "undefined") {
results[property[0]] = [];
}
}
if (typeof asProperties[property[0]][selectors[i]] === "undefined") {
asProperties[property[0]][selectors[i]] = [];
}
asProperties[property[0]][selectors[i]].push({
property: property[1],
rule: rule
});
}
});
}
Object.getOwnPropertyNames(asProperties).filter(function(name) {
results[name] = results[name].concat(getValuesAsProperties(asProperties[name], elements));
});
return results;
},
function(result) {
if (typeof result.error !== "undefined") {
callback(result.error);
} else {
callback(false, result);
}
});
} catch (err) {
callback(err, results);
}
}
});
};