### 循环遍历
for u in url_list:
url = u
print(url)
res = requests.get(url,headers=headers)
res.encoding = 'utf-8'
text = res.text
sp1 = text.split("参与期数")[1].split("表演嘉宾")[0]
selector = etree.HTML(sp1)
tr_list = selector.xpath('.//tr')
tr_list = tr_list[1:]
###名字
for i in tr_list:
#print(i.xpath(".//div[@class='para']/b/a/text()")[0])
name.append(i.xpath(".//div[@class='para']/b/a/text()")[0])
### 明星类型
for i in tr_list:
te = i.xpath(".//div[@class='para']/text()")[0]
te = te.split(",")[0].replace("(","")
#print(te)
types.append(te)
print(name)
print(types)
print(len(name))
print(len(types))
保存数据(excel)
outwb = openpyxl.Workbook()
outws = outwb.create_sheet(index=0)
outws.cell(row=1, column=1, value="名字")
outws.cell(row=1, column=2, value="明星类型")
for i in range(0,len(name)):
outws.cell(row=i+2, column=1, value=str(name[i]))
outws.cell(row=i+2, column=2, value=str(types[i]))
outwb_p.save("奔跑吧嘉宾名单-李运辰.xls") # 保存
data = pd.read_excel("奔跑吧嘉宾名单-李运辰.xls")
name = data['名字'].tolist()
types = data['明星类型'].tolist()
然后对name,嘉宾名字进行个数(参加过多少次)统计排名(取前15)
# 排序方法
from collections import Counter
# 排序
d = sorted(result.items(), key=lambda x: x[1], reverse=True)
name_key = [d[i][0] for i in range(0,16)]
value = [d[i][1] for i in range(0,16)]
print(name_key)
print(value)
进行可视化展示
导入相关的库
### 画图
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.charts import Bar
from pyecharts.charts import Pie
### 嘉宾职业类型
name = ['演员','歌手','主持人','模特','主持人','运动员','舞者','制片人','赛车手','经纪人']
### 初始化为0
value = [0,0,0,0,0,0,0,0,0,0]
for i in types:
for j in range(0,len(name)):
if name[j] in i:
value[j] = value[j] +1
print(name)
print(value)