Python 去掉文本中的HTML,同时还去掉了回车、空格、制表符、JS、CSS
from bs4 import BeautifulSoup as bs
if __name__ == '__main__':
content='xxxx'
content_text = bs(content, "lxml")
[script.extract() for script in content_text.findAll('script')]
[style.extract() for style in content_text.findAll('style')]
content_text = content_text.get_text().replace(' ', '').replace('\n', '').replace('\r', '').replace('\t', '').replace(' ', '').replace(' ', '')
print(content_text )