Python 分词并统计词频


#先对建立汇总到txt文件中,然后进行分词,读到另外一个txt 文件中
import matplotlib
import matplotlib.pyplot as plt #数据可视化
import jieba #词语切割
import wordcloud #分词
from wordcloud import WordCloud,ImageColorGenerator,STOPWORDS #词云,颜色生成器,停止
import numpy as np #科学计算
from PIL import Image #处理图片
f = open('D:/SAB/Desktop/res.txt')
textfile= f.read() #读取文本内容
wordlist = jieba.cut_for_search(textfile)#切割词语
space_list = ' '.join(wordlist) # 链接词语
with open("D:/SAB/Desktop/word4.txt", "w", encoding='utf-8') as f:
f.write(space_list)

#统计上面分好的词语的个数
import io
import jieba
txt = io.open("D:\SAB\Desktop/word.txt", "r", encoding='utf-8').read()
words = jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1:
continue
else:
counts[word] = counts.get(word,0) + 1
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
for i in range(15):
word, count = items[i]
print (u"{0:<10}{1:>5}".format(word, count))

结果:

猜你喜欢

转载自www.cnblogs.com/yusuf/p/13393777.html
今日推荐