【文本处理 词频统计】python 实现词频统计

自定义词频统计函数:wordcount

# -*- encoding=utf-8 -*-

import string
import pandas as pd

word_list=[]
freq_list=[]
def wordcount(path):
    with open(path,'r',encoding='utf-8') as text:
        words = [raw_word.strip(string.punctuation).lower() for raw_word in text.read().split()]
        words_index = set(words)
        words_count = {index:words.count(index) for index in words_index}
    for  word in sorted(words_count ,key=lambda x:words_count[x],reverse=True):
        print('{} {}'.format(word,words_count[word]))
        word_list.append(word)
        freq_list.append(words_count[word])



if __name__ == '__main__':

    path = 'F:\\标签库\\data\\aa.csv'
    result=pd.DataFrame({"word":word_list,"freq":freq_list})
    result.to_csv('F:\\标签库\\data\\bb.csv',index=False)
E:\laidefa\python.exe "E:/Program Files/pycharmproject/文本关键词提取/词频统计.py"
vs 2960
情况 1560
联赛 1473
亚盘 1337
分析 1239
优势 1014
主胜 925
后市 890
支持 846

猜你喜欢

转载自blog.csdn.net/u013421629/article/details/81028154