-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper2.py
More file actions
40 lines (32 loc) · 954 Bytes
/
Copy pathscraper2.py
File metadata and controls
40 lines (32 loc) · 954 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#1. Accessing the website through URL
import requests
from bs4 import BeautifulSoup
url = "http://quotes.toscrape.com"
response = requests.get(url)
print(response.status_code)
#2. Web Scraping
#Printing Quotes
soup=BeautifulSoup(response.text,"html.parser")
quotes=soup.find_all("span",class_="text")
for quote in quotes:
print(quote.text)
#Printing Authors
data=soup.find_all("div",class_="quote")
for item in data:
quote=item.find("span",class_="text").text
author=item.find("small",class_="author").text
print(quote,"-",author)
#4. Stroring data in dataframe
import pandas as pd
quotes_list=[]
authors_list=[]
for item in data:
quotes_list.append(item.find("span",class_="text").text)
authors_list.append(item.find("small",class_="author").text)
df=pd.DataFrame({
"Quotes":quotes_list,
"Authors":authors_list
})
df
#5. Save to Excel
df.to_excel("quotes.xlsx", index=False)