-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape.py
87 lines (84 loc) · 2.12 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from urllib2 import urlopen as uReq
from bs4 import BeautifulSoup as soup
import urllib
my_url='https://www.scholarships.com/financial-aid/college-scholarships/scholarship-directory/academic-major'
uClient = uReq(my_url)
page_html=uClient.read()
uClient.close()
page_soup = soup(page_html,"html.parser")
containers=page_soup.findAll("ul",{"id":"ullist"})
listpoints = containers[0].findAll("li")
for points in listpoints:
links=points.a["href"]
bas="https://www.scholarships.com"
finlink=bas+links
u1Client=uReq(finlink)
page1_html=u1Client.read()
u1Client.close()
page1_soup = soup(page1_html,"html.parser")
cont=page1_soup.findAll("td")
for pts in cont:
lks=pts.a["href"]
finlks=bas+lks
'''u2Client=uReq(finlks)
page2_html=u2Client.read()
u2Client.close()
page2_soup = soup(page2_html,"html.parser")'''
u2client=urllib.urlopen(finlks.encode('utf-8')).read()
page2_soup=soup(u2client,"html.parser")
details=page2_soup.find("div",{"id":"divscholdetails"})
det=details.ul
info=det.findAll("li")
flag=0
for inform in info:
if inform.text=='Website Address':
flag=1
wadd=inform.text
if flag==1:
wadd+=inform.text
if inform.text=='Contact':
flag=2
contact=inform.text
if flag==2:
contact+=inform.text
if inform.text=='Address':
flag=3
add=inform.text
if flag==3:
add+=inform.text
if inform.text=='Application Deadline':
flag=4
dline=inform.text
if flag==4:
dline+=inform.text
if inform.text=='Number Of Awards':
flag=5
awards=inform.text
if flag==5:
awards+=inform.text
if inform.text=='Maximum Amount':
flag=6
amount=inform.text
if flag==6:
amount+=inform.text
if inform.text=='Scholarship Description':
flag=7
des=inform.text
if flag==7:
des+=inform.text
'''
print("Website Address: "+wadd)
print("Contact: "+contact)
print("Address: "+add)
print("Application Deadline: "+dline)
print("Number of Awards: "+awards)
print("Maximum Amount: "+amount)
print("Scholarship Description: "+des)
'''
print(wadd)
print(contact)
print(add)
print(dline)
print(awards)
print(amount)
print(des)