python实现豆瓣电影评价感情分析

先上图:（资源链接蓝奏云：https://zyjblogs.lanzous.com/iGjjfe2jyaj）

1.词云图

2.评价星级饼图

3.简报（好评率，最好评价，最差评价）

最好评价：很好看的！剧情有倒叙说看不懂的往下看就行！任嘉伦演技很厉害了，那岚岳林敬两个人很容易分开，演可爱也是一点不尴尬就是很不错！张慧雯长的也挺可爱的~关于剧情倒叙很多伏笔很多，作为一个原创剧本我个人很满意！每个人都有自己的小心思小秘密，需要观众一点一点揣摩~没有绝对坏人，对于明尊我也不是很讨厌，可能因为演技太好有点被林源圈粉！先夸一夸实景拍摄！真的太美了，很久没见到几乎全实景的武侠剧了！！！包括有一场捅马蜂窝的戏都是真实拍摄真的好开心能看到这样一部良心剧！！剧情不拖沓不注水不加戏，人设鲜明每个人都有私心但又都能看到可怜的一面，你看到的感情线很多都是互相利用，太高能了！男女主有仇，看上去套路性但其实这个仇根本不影响他们的感情，林若寒也是很支持林敬追求真爱不希望上辈子的仇给下一辈留下，是很好的母亲
最差评价：辣鸡片子，一点也不好看
好评率：68.4%

二、代码部分

import requests
from bs4 import  BeautifulSoup
import traceback
import csv
import  jieba
import  csv
from wordcloud import WordCloud
import  numpy as np
from PIL import Image
import snownlp
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
#定义请求每页影评的方法
header = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    "Connection": "keep - alive",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36",
    "Cookie": 'bid=HPwx786ji5w; douban-fav-remind=1; viewed="22601258"; gr_user_id=954bdfba-9778-4359-b238-cd539123a160; _vwo_uuid_v2=D7DD1B1011AD0B9B5B3332525CEEF25CF|9b95f719e9255f99462f09e1248197a2; __utmz=223695111.1592467854.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; ll="118254"; __utmc=30149280; __utmc=223695111; ap_v=0,6.0; __utma=30149280.349582947.1592100671.1592986456.1592989460.7; __utmz=30149280.1592989460.7.5.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1592989707%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D_7ZcG2FOcVjzEmAtnon0r-2-zpQeowzBEOKVYuJSrfmg_SLF6-lCeZXNH6BtW6ig%26wd%3D%26eqid%3Da740e3ec00010995000000065eeb1ef0%22%5D; _pk_ses.100001.4cf6=*; __utma=223695111.1175335955.1592467854.1592986456.1592989707.4; __utmb=223695111.0.10.1592989707; __utmb=30149280.5.10.1592989460; _pk_id.100001.4cf6=263156288f9d7135.1592467854.4.1592992247.1592986526.'
}
rating = [0, 0, 0, 0, 0]
AllRating = 0
star_List= ['很差','较差','还行','推荐','力荐']

def getCommentByPage(url,commentList):
    #4.添加了请求头的请求
    response=requests.get(url,headers=header)
    if response.status_code==200:
        bs=BeautifulSoup(response.content,"html5lib")
        commentItemList=bs.select(".comment-item")
        try:
            for commentItem in commentItemList:
                #print(commentItem)
                comment =commentItem.select_one(".comment")
                commentInfo = comment.select_one(".comment-info")
                #获取评议人
                auther = commentInfo.select_one("a").text
                #print(auther)
                #获取打分
                star=commentInfo.select_one(".rating")
                if(star!=None):
                    star1=star.get('title')
                    #print(star1)

                    for i in range(0,5):
                        if(star1 == star_List[i]):
                            rating[i] += 1

                commentContent=comment.select_one(".short").text.replace("\n","")
                if not star==None:
                    #print(auther,"---",star['title'],"----",commentContent)
                    commentList.append([auther,star['title'],commentContent])
            return commentList
        except Exception:#打印异常信息
            print(traceback.print_exec())
            pass


def readData():
    commentList = []
    with open(f"{name}.csv", 'r', encoding="utf-8") as file:
        csvReader = csv.reader(file)
        #print(csvReader)
        #遍历迭代
        #4.使用列表生成式
        return [item[2] for item in csvReader]
    pass

def generateWordCloud():
    commentList = readData()
    finalComment = ""
    k = 0
    m = 0
    # 加载停止词典
    stop_words = [w.strip() for w in open('cn_stopwords.txt', encoding="utf-8").readlines()]
    max = snownlp.SnowNLP(commentList[0]).sentiments
    maxtag = commentList[0]
    min = snownlp.SnowNLP(commentList[0]).sentiments
    mintag = commentList[0]
    for comment in commentList:
        # 如果不在停止此表中加入结果集
        if comment not in stop_words:
            finalComment+=comment
            s = snownlp.SnowNLP(comment)
            # 进行对没条评论情感分析打分累加
            k = k + s.sentiments
            # 对评论总数进行累加
            m = m + 1
        if max < s.sentiments:
            max = s.sentiments
            maxtag = comment
        if min > s.sentiments:
            min = s.sentiments
            mintag = comment
    f = open(name+".txt", "w",encoding="utf-8")
    str1 =maxtag + "\n"
    str1 =str1+"最差评价：" + mintag + "\n"
    str1 =str1+"好评率："+str(round(k / m, 3)*100)+"%" +"\n"

    f.write(str1)
    finalComment=" ".join(jieba.cut(finalComment))
    #自定义词云轮廓
    image =np.array(Image.open("1.png"))
    #4、生成词云
    #font_path字体路径
    #background_color背景颜色
    #mask:自定义图片最为生成慈云的轮廓
    wordCloud = WordCloud(
        font_path="YaHeiMonacoHybrid.ttf",
        background_color="white",
        mask=image
    ).generate(finalComment)
    #保存生成本地词云
    wordCloud.to_file(f"{name}.png")

def generatePie():
    try:
        for i in range(0, 5):
            AllRating = AllRating + rating[i]
        for i in range(0, 5):
            rating[i] = rating[i]/AllRating
    except Exception:
        pass
    font = FontProperties(fname='YaHeiMonacoHybrid.ttf', size=16)
    plt.pie(
        x=rating,
        labels=['1','2','3','4','5'],
        colors = ['red','pink','blue','purple','orange'],
        startangle=90,
        shadow=True,
        #explode=tuple(indic),  # tuple方法用于将列表转化为元组
        autopct='%1.1f%%'  # 是数字1，不是l
    )
    plt.title(u'好评分析', FontProperties=font)

    plt.savefig(name+"_饼图.jpg")
    plt.show()

if __name__ == '__main__':
    commentList=[]
    url = input('请输入要分析 电影的id:(例子：https://movie.douban.com/subject/30425206中30425206)')
    #print("ID---------star----------评价\n")
    name=""
    for i in range(10):
        #baseUrl = f"https://movie.douban.com/subject/30425206/comments?start={i * 20}"
        baseUrl = f"https://movie.douban.com/subject/{url}/comments?start={i * 20}"
        response = requests.get(baseUrl, headers=header)
        if response.status_code == 200:
            bs = BeautifulSoup(response.content, "html5lib")
            name = bs.title.text
            name = name.strip()
        commentList=getCommentByPage(baseUrl,commentList)
        with open(f"{name}.csv", 'w', newline="", encoding="utf-8") as file:
            csvWriter = csv.writer(file)
            #print(commentList)
            csvWriter.writerows(commentList)
    f = open(name+".txt", "w",encoding="utf-8")
    str1 =name +"\n" +"最好评价："
    f.write(str1)
    generateWordCloud()
    generatePie()
    print("分析完成")

三、停止词典等资源如下：

蓝奏云：https://zyjblogs.lanzous.com/iGjjfe2jyaj

python实现豆瓣电影评价感情分析

猜你喜欢