-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScript.py
41 lines (33 loc) · 1.02 KB
/
Script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Define the URL
url = 'http://quotes.toscrape.com/'
response = requests.get(url)
if response.status_code == 200:
print("Page retrieved successfully")
else:
print("Error retrieving the page")
soup = BeautifulSoup(response.text, 'html.parser')
# Empty lists to store the scraped data
quotes = []
authors = []
tags = []
quote_containers = soup.find_all('div', class_='quote')
# Loop through each container to extract data
for container in quote_containers:
quote_text = container.find('span', class_='text').text
quotes.append(quote_text)
author = container.find('small', class_='author').text
authors.append(author)
tag_elements = container.find_all('a', class_='tag')
tag_text = ', '.join(tag.text for tag in tag_elements)
tags.append(tag_text)
quotes_df = pd.DataFrame({
'Quote': quotes,
'Author': authors,
'Tags': tags
})
print(quotes_df)
quotes_df.to_csv('quotes_data.csv', index=False)
print("Data saved to quotes_data.csv")