自定义词频统计函数:wordcount
# -*- encoding=utf-8 -*-
import string
import pandas as pd
word_list=[]
freq_list=[]
def wordcount(path):
with open(path,'r',encoding='utf-8') as text:
words = [raw_word.strip(string.punctuation).lower() for raw_word in text.read().split()]
words_index = set(words)
words_count = {index:words.count(index) for index in words_index}
for word in sorted(words_count ,key=lambda x:words_count[x],reverse=True):
print('{} {}'.format(word,words_count[word]))
word_list.append(word)
freq_list.append(words_count[word])
if __name__ == '__main__':
path = 'F:\\标签库\\data\\aa.csv'
result=pd.DataFrame({"word":word_list,"freq":freq_list})
result.to_csv('F:\\标签库\\data\\bb.csv',index=False)
E:\laidefa\python.exe "E:/Program Files/pycharmproject/文本关键词提取/词频统计.py"
vs 2960
情况 1560
联赛 1473
亚盘 1337
分析 1239
优势 1014
主胜 925
后市 890
支持 846