#coding:gbk
import jieba
#stopwords = {}.fromkeys([ line.rstrip() for line in open('stopword.txt') ])
stopwords = {}.fromkeys(['的', '附近'])
segs = jieba.cut('北京附近的租房', cut_all=False)
final = ''
for seg in segs:
seg = seg.encode('gbk')
if seg not in stopwords:
final += seg
print final
python 去除停用词 结巴分词
猜你喜欢
转载自blog.csdn.net/a1b2c3d4123456/article/details/52943536
今日推荐
周排行