- 编程
豆瓣top250-爬虫实例1
- 2023-12-23 19:22:44 @
# -*- coding:utf-8 -*-
import urllib.request
import re
from bs4 import BeautifulSoup
# 爬虫函数
def crawl(url, headers):
page = urllib.request.Request(url, headers=headers)
page = urllib.request.urlopen(page)
contents = page.read()
#print(contents)
soup = BeautifulSoup(contents, "html.parser")
print('豆瓣电影250: 序号 \t影片名\t 评分 \t评价人数')
for tag in soup.find_all(attrs={"class":"item"}):
content = tag.get_text()
content = content.replace('\n','') #删除多余换行
print(content, '\n')
# 主函数
if __name__ == '__main__':
url = 'http://movie.douban.com/top250?format=text'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
crawl(url, headers)
0 条评论
目前还没有评论...