爬虫大作业(虎扑足球新闻)

import requests
from bs4 import BeautifulSoup
import jieba
from PIL import Image,ImageSequence
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
def changeTitleToDict():
    f = open('yingchao.txt', 'r',encoding='utf-8')
    str = f.read()
    stringList = list(jieba.cut(str))
    symbol = {"/", "(", ")" , " ", ";", "!", "、" ,  ":"}
    stringSet = set(stringList) - symbol
    title_dict = {}
    for i in stringSet:
        title_dict[i] = stringList.count(i)
    print(title_dict)
    return title_dict
for i in range(1,10):
        page = i;
        hupu = 'https://voice.hupu.com/soccer/tag/496-%s.html' % (page)
        reslist = requests.get(hupu)
        reslist.encoding = 'utf-8'
        soup_list = BeautifulSoup(reslist.text, 'html.parser')
        for news in soup_list.find_all('span',class_='n1'):
            print(news.text)
            f = open('yingchao.txt', 'a', encoding='utf-8')
            f.write(news.text)
            f.close()

title_dict = changeTitleToDict()
font = r'C:\Windows\Fonts\simhei.ttf'
content = ' '.join(title_dict.keys())
# 根据图片生成词云
image = np.array(Image.open('1.jpg'))
wordcloud = WordCloud(background_color='white', font_path=font, mask=image, width=1000, height=860, margin=2).generate(content)
#字体颜色
image2 = np.array(Image.open('2.jpg'))
iamge_colors = ImageColorGenerator(image2)
wordcloud.recolor(color_func=iamge_colors)

# 显示生成的词云
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
wordcloud.to_file('3.jpg')

背景图

字体颜色图

词云图

猜你喜欢

转载自www.cnblogs.com/Lorz/p/8969234.html
今日推荐