Python学习-gevent模块使用实例

时光网电视剧TOP100多协程爬虫demo

以下是4月13号的作业,记录一下放了方便翻阅!也不知道写的对不对,反正能运行..

from gevent import monkey
monkey.patch_all()
import requests,openpyxl,time,gevent
from gevent.queue import Queue
start = time.time()
work_list = []
headers = {
'Cookie': '_tt_=7F658070FCFA18C5BCFDEC18E58ECF7E; Hm_lvt_07aa95427da600fc217b1133c1e84e5b=1618303016; __utma=196937584.1729426125.1618303016.1618303016.1618303016.1; __utmc=196937584; __utmz=196937584.1618303016.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmt=1; __utmt_~1=1; Hm_lpvt_07aa95427da600fc217b1133c1e84e5b=1618303021; __utmb=196937584.4.10.1618303016',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}
wk = Queue()
xlsx = openpyxl.Workbook()
sheet = xlsx.active
sheet.title = '时光网电视剧TOP100'
sheet['A1'] = '电视剧名'
sheet['B1'] = '导演'
sheet['C1'] = '演员'
sheet['D1'] = '评语'



def pachong():
    while not wk.empty():
        url = wk.get_nowait()
        res = requests.get(url,headers=headers)
        res_1 = res.json()
        items = res_1['data']['items']
        for item in items:
            description = item['description']
            actors = item['movieInfo']['actors']
            director = item['movieInfo']['director']
            movieName = item['title']
            sheet.append([movieName,director,actors,description])


for i in range(10):
    url = 'http://front-gateway.mtime.com/community/top_list/detail.api?tt=1618303254691&id=300765&pageIndex='+str(i+1)+'&pageSize=10'
    wk.put_nowait(url)
    work = gevent.spawn(pachong)
    work_list.append(work)
    
gevent.joinall(work_list)
xlsx.save('时光网电视剧TOP100.xlsx')
end = time.time()
print(end-start)
温馨提示:本文最后更新于2021-10-16 22:04:21
某些文章具有时效性,若有错误或已失效,请在下方留言或联系PING科技-站长
© 版权声明
THE END
喜欢就支持以下吧
点赞0
分享
评论 抢沙发
Ping科技的头像-Ping科技/iOS黑科技资源网

昵称

取消
昵称表情代码图片