期末作业 词频统计

wwwFile = open('www.txt',mode='r',encoding='utf-8')
wwwText = wwwFile.read()
wwwFile.close()
print(wwwText)


replaceList=[',','.',"'",'\n']
for c in replaceList:
    wwwText = wwwText.replace(c,'')
print(wwwText)



print(wwwText.split(' '))
wwwList=wwwText.split(' ')
print(wwwList.count('www'))

wwwSet = set(wwwList)
print(wwwSet)

wwwDict ={ }
for word in wwwSet:
    wwwDict[word] = wwwList.count(word)

    print(wwwDict)
    for d in wwwDict:
     print(d,wwwDict[d])


wordCountList = list(wwwDict.items())
print(wordCountList)
wordCountList.sort(key=lambda x:x[1],reverse=True)
print(wordCountList)

for i in range(20):
    print(wordCountList[i])

wwwCountFile = open('wwwCount.txt', mode='a',encoding='utf-8')
for i in range(len(wordCountList)):
    wwwCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n')
wwwCountFile.close()

猜你喜欢

转载自www.cnblogs.com/guangzhoushangxueyuan121/p/9206189.html