-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathTheScrapper.py
150 lines (130 loc) · 6.68 KB
/
TheScrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import json
from argparse import ArgumentParser
import requests
from requests.exceptions import MissingSchema
from modules.info_reader import InfoReader
from modules.scrapper import Scrapper
banner: str = """
▄▄▄█████▓ ██░ ██ ▓█████ ██████ ▄████▄ ██▀███ ▄▄▄ ██▓███ ██▓███ ▓█████ ██▀███
▓ ██▒ ▓▒▓██░ ██▒▓█ ▀ ▒██ ▒ ▒██▀ ▀█ ▓██ ▒ ██▒▒████▄ ▓██░ ██▒▓██░ ██▒▓█ ▀ ▓██ ▒ ██▒
▒ ▓██░ ▒░▒██▀▀██░▒███ ░ ▓██▄ ▒▓█ ▄ ▓██ ░▄█ ▒▒██ ▀█▄ ▓██░ ██▓▒▓██░ ██▓▒▒███ ▓██ ░▄█ ▒
░ ▓██▓ ░ ░▓█ ░██ ▒▓█ ▄ ▒ ██▒▒▓▓▄ ▄██▒▒██▀▀█▄ ░██▄▄▄▄██ ▒██▄█▓▒ ▒▒██▄█▓▒ ▒▒▓█ ▄ ▒██▀▀█▄
▒██▒ ░ ░▓█▒░██▓░▒████▒▒██████▒▒▒ ▓███▀ ░░██▓ ▒██▒ ▓█ ▓██▒▒██▒ ░ ░▒██▒ ░ ░░▒████▒░██▓ ▒██▒
▒ ░░ ▒ ░░▒░▒░░ ▒░ ░▒ ▒▓▒ ▒ ░░ ░▒ ▒ ░░ ▒▓ ░▒▓░ ▒▒ ▓▒█░▒▓▒░ ░ ░▒▓▒░ ░ ░░░ ▒░ ░░ ▒▓ ░▒▓░
░ ▒ ░▒░ ░ ░ ░ ░░ ░▒ ░ ░ ░ ▒ ░▒ ░ ▒░ ▒ ▒▒ ░░▒ ░ ░▒ ░ ░ ░ ░ ░▒ ░ ▒░
░ ░ ░░ ░ ░ ░ ░ ░ ░ ░░ ░ ░ ▒ ░░ ░░ ░ ░░ ░
░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░
░
"""
parser = ArgumentParser(description="TheScrapper - Contact finder")
parser.add_argument("-u", "--url", required=False,
help="The URL of the target.")
parser.add_argument("-us", "--urls", required=False,
help="The URL of the target.")
parser.add_argument("-c", "--crawl", default=False, required=False, action="store_true",
help="Use every URL found on the site and hunt it down for information.")
parser.add_argument("-b", "--banner", default=False, required=False, action="store_true",
help="Use every URL found on the site and hunt it down for information.")
parser.add_argument("-s", "--sm", default=False, required=False, action="store_true",
help="Extract infos from the SocialMedia accounts.")
parser.add_argument("-o", "--output", default=False, required=False, action="store_true",
help="Save the output in a JSON file.")
parser.add_argument("-v", "--verbose", default=False, required=False, action="store_true",
help="Verbose output mode.")
args = parser.parse_args()
def verbPrint(content: str):
if args.verbose:
print(content)
pass
target_type = ""
if not args.url and not args.urls:
exit("Please add --url or --urls")
else:
if args.url:
target_type = "URL"
else:
target_type = "FILE"
if not args.banner:
print(banner)
if target_type == "URL":
if not (args.url.startswith("https://") or args.url.startswith("http://")):
args.url = "http://" + args.url
print("*" * 50 + "\n" + f"Target: {args.url}" + "\n" + "*" * 50 + "\n")
requests.get(args.url)
url: str = args.url
verbPrint("Scraping (and crawling) started")
scrap = Scrapper(url=url, crawl=args.crawl)
verbPrint("Scraping (and crawling) done\nReading and sorting information")
IR = InfoReader(content=scrap.getText())
emails: list = IR.getEmails()
numbers = IR.getPhoneNumber()
sm: list = IR.getSocials()
verbPrint("Reading and sorting information done")
print("\n")
print("E-Mails: " + "\n - ".join(emails))
print("Numbers:" + "\n - ".join(numbers))
if args.sm:
print("SocialMedia: ")
sm_info = IR.getSocialsInfo()
for x in sm_info:
url = x["url"]
info = x["info"]
if info:
print(f" - {url}:")
for y in info:
print(f" - {y}: {info[y]}")
else:
print(f" - {url}")
else:
print("SocialMedia: " + ", ".join(sm))
if args.output:
out = {
"E-Mails": emails,
"SocialMedia": sm,
"Numbers": numbers
}
file_name = url.lower().replace(
"http://", "").replace("https://", "").replace("/", "")
json.dump(out, open(f"output/{file_name}.json", "w+"), indent=4)
elif target_type == "FILE":
out = []
for url in open(args.urls, "r").readlines():
url = url.replace("\n", "")
print("\n\n")
if "https://" not in url:
url = "https://" + url
print("*" * 50 + "\n" + f"Target: {url}" + "\n" + "*" * 50 + "\n")
requests.get(url)
verbPrint("Scraping (and crawling) started")
scrap = Scrapper(url=url, crawl=args.crawl)
verbPrint("Scraping (and crawling) done\nReading and sorting information")
IR = InfoReader(content=scrap.getText())
emails: list = IR.getEmails()
numbers = IR.getPhoneNumber()
sm: list = IR.getSocials()
out.append({
"Target": url,
"E-Mails": emails,
"SocialMedia": sm,
"Numbers": numbers
})
verbPrint("Reading and sorting information done")
print("E-Mails:\n" + "\n - ".join(emails))
print("Numbers:\n" + "\n - ".join(numbers))
if args.sm:
print("SocialMedia: ")
sm_info = IR.getSocialsInfo()
for x in sm_info:
url = x["url"]
info = x["info"]
if info:
print(f" - {url}:")
for y in info:
print(f" - {y}: {info[y]}")
else:
print(f" - {url}")
else:
print("SocialMedia: " + ", ".join(sm))
if args.output:
file_name = args.urls.replace("/", "_")
json.dump(out, open(f"output/{file_name}.json", "w+"), indent=4)