python 去除停用词 结巴分词

#coding:gbk
import jieba
#stopwords = {}.fromkeys([ line.rstrip() for line in open('stopword.txt') ])
stopwords = {}.fromkeys(['的', '附近'])
segs = jieba.cut('北京附近的租房', cut_all=False)
final = ''
for seg in segs:
    seg = seg.encode('gbk')
    if seg not in stopwords:
            final += seg
print final

猜你喜欢

转载自blog.csdn.net/a1b2c3d4123456/article/details/52943536