-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcoinmarketcap.py
122 lines (109 loc) · 4.61 KB
/
coinmarketcap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from textwrap import indent
from requests import get
from bs4 import BeautifulSoup
from lib2to3.pgen2 import driver
import re
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
# ------------------------------------------------------------------
telegram_regex = '(https:\/\/(t|tlgrm|telegram)\.me\/)'
twitter_regex = '(https:\/\/twitter.com\/(?![a-zA-Z0-9_]+\/)([a-zA-Z0-9_]+))'
discord_regex = '(https?:\/\/)?(www\.)?(discord\.(gg|io|me|li)|discordapp\.com\/invite)\/.+[a-z]'
reddit_regex = '^http(?:s)?://(?:www\.)?(?:[\w-]+?\.)?reddit.com(/r/|/user/)?(?(1)([\w:]{2,21}))(/comments/)?(?(3)(\w{5,6})(?:/[\w%\\\\-]+)?)?(?(4)/(\w{7}))?/?(\?)?(?(6)(\S+))?(\#)?(?(8)(\S+))?$'
facebook_regex = '(?:(?:http|https):\/\/)?(?:www.)?facebook.com\/(?:(?:\w)*)'
insta_regex = '(?:(?:http|https):\/\/)?(?:www.)?(?:instagram.com|instagr.am|instagr.com)\/(\w+)'
regex_coin = r'/currencies\/*\/[a-z-]*\/$'
list_of_urls = []
list_of_all = []
page = 1
while True:
try:
current_page_url = f"https://coinmarketcap.com/coins/?page={page}"
get_current_page = get(current_page_url)
soup = BeautifulSoup(get_current_page.content, "html.parser")
all_coins = soup.find_all("a", class_="cmc-link")
for link in all_coins:
if "/currencies/" in link["href"]:
if re.match(regex_coin, str(link["href"])):
fixed_link = "https://coinmarketcap.com" + link["href"]
list_of_urls.append(fixed_link)
if len(list_of_urls) >= 100:
break
break
page = page + 1
except:
break
index = 1
DRIVER_PATH = 'chromedriver.exe'
option = webdriver.ChromeOptions()
option.add_argument('headless')
driver = webdriver.Chrome(executable_path=DRIVER_PATH, options=option)
for url in list_of_urls:
driver.get(
url
)
print(f"-----------------------------{index}/{len(list_of_urls)}")
# print(f'got in url:::::::::::::::::::{url}')
get_coin_link = get(url)
coin_soup = BeautifulSoup(get_coin_link.content, "html.parser")
coin_name = coin_soup.find("h2", class_="sc-1q9q90x-0 jCInrl h1").contents[0]
symbol = coin_soup.find("h2", class_="sc-1q9q90x-0 jCInrl h1").contents[1].text
links = []
link = driver.find_elements(By.XPATH,
"//ul[@class='content']//li//button[@class='link-button']")
a = ActionChains(driver)
for i in link:
a.move_to_element(i).perform()
try:
element = WebDriverWait(driver, 3).until(
EC.presence_of_all_elements_located((By.XPATH, "//ul[@class='content']//li//a")))
for x in element:
links.append(x.get_attribute('href'))
# time.sleep(1)
except:
pass
Coin_Telegram = 'empty'
insta_regex = 'empty'
Coin_Twitter = 'empty'
Coin_Discord = 'empty'
Coin_Reddit = 'empty'
Coin_Facebook = 'empty'
Coin_Instagram = 'empty'
for item in links:
if re.match(telegram_regex, item):
Coin_Telegram = item
print('telegram found!')
elif re.match(twitter_regex, item):
Coin_Twitter = item
print('twitter found!')
elif re.match(reddit_regex, item):
Coin_Reddit = item
print('Reddit found!')
elif re.match(discord_regex, item):
Coin_Discord = item
print('discord found!')
elif re.match(facebook_regex, item):
Coin_Facebook = item
print('Facebook found!')
elif re.match(insta_regex, item):
Coin_Instagram = item
index = index + 1
# {coin_link.strip()}
try:
csv_string = f"{coin_name.strip()}({symbol}),{url.strip()},{links[0]},{Coin_Telegram.strip()},{Coin_Twitter.strip()},{Coin_Discord.strip()},{Coin_Reddit.strip()},{Coin_Facebook.strip()},{Coin_Instagram.strip()}"
except:
csv_string = f"{coin_name.strip()}({symbol}),{url.strip()},empty,{Coin_Telegram.strip()},{Coin_Twitter.strip()},{Coin_Discord.strip()},{Coin_Reddit.strip()},{Coin_Facebook.strip()},{Coin_Instagram.strip()}"
list_of_all.append(csv_string)
with open("coinmarketcap.csv", "w") as f:
# This is Information That we want || Headers of CSV File
f.write(
"Name,URL,Website,Telegram,Twitter,Discord,Reddit,Facebook,Instagram")
f.write("\n")
for item in list_of_all:
f.write(item)
f.write("\n")