#1. jianFile = open('jian.txt',mode='r',encoding='utf-8') jianText = jianFile.read() jianFile.close() print(jianText) #2. replaceList = [',',"'",'-','\n'] for c in replaceList: jianText = jianText.replace(c,' ') print(jianText) #3. print(jianText.split(' ')) jianList = jianText.split(' ') #4. jianSet = set(jianList) print(jianSet) jianDict = {} for word in jianSet: jianDict[word] = jianList.count(word) print(jianDict) for d in jianDict: print(d,jianDict[d]) #5. wordCountList = list(jianDict.items()) print(wordCountList) wordCountList.sort(key=lambda x:x[1],reverse=True) print(wordCountList) #6. for i in range(20): print(wordCountList[i]) #7. jianCountFile = open('jianCount.txt',mode='a',encoding='utf-8') for i in range(len(wordCountList)): jianCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n') jianCountFile.close()
词频统计
猜你喜欢
转载自www.cnblogs.com/childish/p/9167859.html
今日推荐
周排行