Previous: [Python3 crawler (four)] [urlib.request module] [ssl authentication + cookies (string type conversion, session)]
++++++++++ start line+++++++ +++++++++++
Article Directory
One, regular expression
1.1 Mind Map
For details, please see: https://blog.csdn.net/make164492212/article/details/51699545
1.2 Greedy mode and non-greedy mode
re.py
import re
# 贪婪模式从开头匹配到结尾,默认为贪婪
# 非贪婪举例如下
one = 'mdfsdsfffdsn12345656n'
two = "2.5"
three = 'a\b'
# 正则表达式,一般设置pattern为正则的变量
# pattern = re.compile('m(.*)n') # 贪婪模式
# result = pattern.findall(one) # findall返回列表
# print(result)
# ['dfsdsfffdsn12345656']
# pattern = re.compile('m(.*?)n') # 非贪婪模式
# result = pattern.findall(one)
# print(result)
# ['dfsdsfffds']
# pattern = re.compile('2.5')
# result = pattern.findall(two)
# print(result)
# ['2.5']
# pattern = re.compile(r'a\b')
# result = pattern.findall(three)
# print(result)
# ['a']
1.3 Matching
re2.py
import re
# . 除了换行符号\n之外的匹配
one = """
msfdsdffdsdfsn
1234567778888N
"""
# 匹配m和n之间的字符
# pattern = re.compile('m(.*)n')
# result = pattern.findall(one)
# print(result)
# .不匹配换行符
# ['sfdsdffdsdfs']
# 正则表达式严格区分大小写
# pattern = re.compile('m(.*)n', re.S | re.I)
# result = pattern.findall(one)
# print(result)
# ['sfdsdffdsdfsn\n 1234567778888']
# re.S匹配换行符
# re.I匹配大小写
1.4 Pure digital regularity
re3.py
import re
# 纯数字的正则,\d代表0-9之间的一个数
pattern = re.compile('^\d+$')
one = '234'
# 匹配判断的方法
# match方法:是否匹配成功,从头开始匹配一次就停止
result = pattern.match(one)
print(result.group())
# 234
1.5 Range operations
re4.py
import re
# 范围运算
one = '7893452'
# 找1或2或3
# pattern = re.compile('[123]')
# ['3', '2']
# 找1-9之间的值
pattern = re.compile('[1-9]')
result = pattern.findall(one)
print(result)
# ['7', '8', '9', '3', '4', '5', '2']
1.6 Regular methods
re5.py
import re
one = 'abc 123'
patter = re.compile('\d+')
# match:从头开始匹配,匹配一次
result = patter.match(one)
# None
# search:从任意位置匹配, 匹配一次
# result = patter.search(one)
# <re.Match object; span=(4, 7), match='123'>
# findall:查找符合正则的内容,输出为list
# result = patter.findall(one)
# ['123']
# sub:替换字符串
# result = patter.sub('#', one)
# abc #
# split:拆分
# patter = re.compile(' ')
# result = patter.split(one)
# ['abc', '123']
print(result)
++++++++++End line++++++++++++++++++