forked from ranksense/url-inspector-automator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjs_extractor.js
125 lines (60 loc) · 3.32 KB
/
js_extractor.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
() => {
var data = {};
//isHidden helper function
// See https://stackoverflow.com/questions/19669786/check-if-element-is-visible-in-dom
function isHidden(el) {
//if(el == null) return true;
//return (el.offsetParent === null)
//Slower
var style = window.getComputedStyle(el);
return (style.display === 'none')
}
//XPath wrapper function
function getElementByXPath(path) {
return document.evaluate(path, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
}
//XPath wrapper function returing an iterator
function getElementsByXPath(path) {
return document.evaluate(path, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
}
//coverage
xpaths = getElementsByXPath('//div[text()="Coverage"]/following-sibling::div');
for (let i = 0, length = xpaths.snapshotLength; i < length; ++i) {
if(isHidden(xpaths.snapshotItem(i)) == false){
data["coverage"] = xpaths.snapshotItem(i);
}
}
//sitemaps
data["sitemaps"] = getElementByXPath('//div[text()="Sitemaps"]/parent::div/div[2]');
//referring_page
data["referring_page"] = getElementByXPath('//div[text()="Referring page"]/parent::div/div[2]');
//crawled_date
data["crawled_date"] = getElementByXPath('//div[text()="Last crawl"]/parent::div/div[2]');
//crawled_as
data["crawled_as"] = getElementByXPath('//div[text()="Crawled as"]/parent::div/div[2]');
//crawled_allowed
data["crawled_allowed"] = getElementByXPath('//div[text()="Crawl allowed?"]/parent::div/div[2]');
//page_fetch
data["page_fetch"] = getElementByXPath('//div[text()="Page fetch"]/parent::div/div[2]');
//indexing_allowed
data["indexing_allowed"] = getElementByXPath('//div[text()="Indexing allowed?"]/parent::div/div[2]');
//user_canonical
data["user_canonical"] = getElementByXPath('//div[text()="User-declared canonical"]/parent::div/div[2]');
//alternative_user_canonical
data["alternative_user_canonical"] = getElementByXPath('//div[text()="User-declared canonical"]/parent::div/div[3]');
//google_canonical
data["google_canonical"] = getElementByXPath('//span[text()="Google-selected canonical"]/parent::div/parent::div/parent::div/div[2]/div');
//loop twice to avoid bug with url_index and url_not_index
data["coverage"] = data["coverage"] && data["coverage"].textContent;
data["sitemaps"] = data["sitemaps"] && data["sitemaps"].textContent;
data["referring_page"] = data["referring_page"] && data["referring_page"].textContent;
data["crawled_date"] = data["crawled_date"] && data["crawled_date"].textContent;
data["crawled_as"] = data["crawled_as"] && data["crawled_as"].textContent;
data["crawled_allowed"] = data["crawled_allowed"] && data["crawled_allowed"].textContent;
data["page_fetch"] = data["page_fetch"] && data["page_fetch"].textContent;
data["indexing_allowed"] = data["indexing_allowed"] && data["indexing_allowed"].textContent;
data["user_canonical"] = data["user_canonical"] && data["user_canonical"].textContent;
data["alternative_user_canonical"] = data["alternative_user_canonical"] && data["alternative_user_canonical"].textContent;
data["google_canonical"] = data["google_canonical"] && data["google_canonical"].textContent;
return data;
}