匹配url链接

# encoding=utf-8
__author__ = "zhangyue"

fi = open('sweb.html','r',encoding='utf-8')
fo = open("text-urls.txt","w")

import re
pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')    # 匹配模式
txt = fi.read()
urls = re.findall(pattern,txt)
for item in urls:
    fo.write(item+"\n")
fi.close()
fo.close()

猜你喜欢

转载自blog.csdn.net/qq_34788903/article/details/89971175