-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
45 lines (40 loc) · 1.57 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd
def dateToURL(month, day, year):
month=str(month)
day=str(day)
year=str(year)
if len(month)==1:
month="0"+month
if len(day)==1:
day="0"+day
return 'https://biz.yahoo.com/research/earncal/{}{}{}.html'.format(year, month, day)
def pullEarnings(month, day, year):
try:
with urllib.request.urlopen(dateToURL(month, day, year)) as page:
soup=BeautifulSoup(page, 'lxml')
# print(soup.prettify())
table=soup.find_all('table')[5]
company=[]
symbol=[]
EPS_estimate=[]
time=[]
for row in table.findAll('tr'):
cells = row.findAll('td')
if len(cells) >=5: # Only extract table body not heading
company.append(cells[0].find(text=True))
symbol.append(cells[1].find(text=True))
EPS_estimate.append(cells[2].find(text=True))
time.append(cells[3].find(text=True))
if company[0]=='\n' or symbol[0]=='\n'or EPS_estimate[0]=='\n'or time[0]=='\n':
company=company[1:]
symbol=symbol[1:]
EPS_estimate=EPS_estimate[1:]
time=time[1:]
earnings=pd.DataFrame({company[0]:company[1:],symbol[0]:symbol[1:],EPS_estimate[0]:EPS_estimate[1:],time[0]:time[1:] })
except urllib.error.HTTPError:
raise Exception('No data for that date')
return earnings
if __name__=="__main__":
print(pullEarnings(2, 10, 2017).head())