-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgoodreads_scrape.py
54 lines (47 loc) · 1.59 KB
/
goodreads_scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import requests
import bs4
import re
def get_original_publication_year(book_id):
response = requests.get('https://www.goodreads.com/book/show/' + book_id)
soup = bs4.BeautifulSoup(response.text)
something = soup.select('#details')
something = str(something)
soup = bs4.BeautifulSoup(something)
something = soup.select('.greyText')
something = str(something[0])
list = re.findall('\d+', something)
i = 0
for item in list:
if len(item) > 2:
return list[i]
break
i+=1
def get_setting(book_id):
response = requests.get('https://www.goodreads.com/book/show/' + book_id)
soup = bs4.BeautifulSoup(response.text)
something = soup.select('#bookDataBox')
something = str(something)
soup = bs4.BeautifulSoup(something)
x = soup.findAll('div', attrs = {'class' : 'infoBoxRowItem'})
x = str(x)
soup = bs4.BeautifulSoup(x)
otherthing = soup.findAll('a', href=True)
other_list = []
for item in otherthing:
if "places" in str(item):
other_list.append(item)
place = []
for item in other_list:
for child in item:
place.append(child)
if soup.findAll('span', attrs = {'class' : 'darkGreyText'}):
something = soup.findAll('span', attrs = {'class' : 'darkGreyText'})[0].getText()
something = str(something)
for letter in something:
if letter == '(' or letter == ')':
something = something.replace(letter,"")
place.append(something)
return place
x = '52357'
#get_original_publication_year(x)
get_setting(x)