Python基础——正则表达式

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_34120459/article/details/87883874

Python基础——正则表达式

一、什么是正则表达式?

正则表达式其实就是一个用来匹配和提取字符串的工具
简单来说就是:在一堆东西里面提取我们想要的内容
正则表达式的模块是:re

1、re.findall():将符合规则的字符串以列表形式返回
import re
s = 'python123'
r = re.findall("python", s)     #['python']
print(r)

二、元字符:. ^ $ {} * + ? | [ ]

1、. 通配符,除\n
import re
s1 = "python123"
s2 = "puthon123\n"
s3 = "puthon123\r"
r1 = re.findall(".", s1)
print(r1)
r2 = re.findall(".", s2)
print(r2)
r3 = re.findall(".", s3)
print(r3)
#修饰符re.S可以使.匹配包括换行在内的所有字符
r4 = re.findall(".", s2, re.S)
print(r4)

'''
运行结果:
['p', 'y', 't', 'h', 'o', 'n', '1', '2', '3']
['p', 'u', 't', 'h', 'o', 'n', '1', '2', '3']
['p', 'u', 't', 'h', 'o', 'n', '1', '2', '3', '\r']
['p', 'u', 't', 'h', 'o', 'n', '1', '2', '3', '\n']
'''
2、^ 脱字符
import re
s1 = "love123\nloveyou\nhhh"
print(s1)
r = re.findall("^love", s1)   #修饰符re.M表示可以多行匹配
print(r)
r = re.findall("^love", s1, re.M)
print(r)

'''
运行结果:
love123
loveyou
hhh
['love']
['love', 'love']
'''
3、$ 结束位置
import re
s1 = "python\npyt\nthon"
print(s1)
r = re.findall("pyt$", s1)
print(r)    # []
r = re.findall("thon$", s1, re.M)
print(r)    # ['thon', 'thon']

'''
运行结果:
python
pyt
thon
[]
['thon', 'thon']
'''
4、 * + ?:匹配前面的表达式次数分别为(0-n)(1-n)(0-1)
import re
s1 = "z\nzo\nzoo"
r = re.findall("zo*", s1, re.M)
print(r)    # ['z', 'zo', 'zoo']
r = re.findall("zo+", s1, re.M)
print(r)    # ['zo', 'zoo']
r = re.findall("zo?", s1, re.M)
print(r)    # ['z', 'zo', 'zo']
5、{}控制表达式次数
import re
s1 = "z\nzo\nzoo"
r = re.findall("zo*", s1, re.M)
print(r)    # ['z', 'zo', 'zoo']
r = re.findall(r"zo{0,}", s1, re.M)
print(r)
r = re.findall("zo+", s1, re.M)
print(r)    # ['zo', 'zoo']
r = re.findall("zo{1,}", s1, re.M)
print(r)
r = re.findall("zo?", s1, re.M)
print(r)    # ['z', 'zo', 'zo']
r = re.findall("zo{0,1}", s1, re.M)
print(r)    # ['z', 'zo', 'zo']

r = re.findall("zo{2}", s1, re.M)
print(r)    # ['zoo']
6、[ ]字符组:控制的是匹配内容
import re
s = "test\nTesting\nzoo"

r = re.findall("[eio]", s, re.M)
print(r)
r = re.findall("[e-o]", s, re.M)    # efghijklmno
print(r)    # ['e', 'e', 'i', 'n', 'g', 'o', 'o']
# ^
r = re.findall("^[eio]", s, re.M)
print(r)    # []

r = re.findall("[^eio]", s, re.M)   # 匹配未包含的字符范围
print(r)    # ['t', 's', 't', '\n', 'T', 's', 't', 'n', 'g', '\n', 'z']
r = re.findall("[^e-o]", s, re.M)   # 匹配未包含的字符范围
print(r)    # ['t', 's', 't', '\n', 'T', 's', 't', '\n', 'z']
7、|: 选择元字符
import re
s = "p\npython\nhello"
r1 = re.findall("p|hello", s, re.M)  #['p', 'p', 'hello']
print(r1)
r2 = re.findall("[h|l]o", s, re.M)
print(r2)     #['ho', 'lo']
8、分组元字符:() 将括号内的表达式定义为组
import re
s = "p\npoo\nhool"
r1 = re.findall("[p|h]o*", s, re.M)  #['p', 'poo', 'hoo']
print(r1)
r2 = re.findall("[p|h](o*)", s, re.M)
#在['p', 'poo', 'hoo']的基础上再进行(o*)取值
print(r2)      #['', 'oo', 'oo']
9、转义元字符

取消字符串的转义可以在前面加 r
取消正则语法的转义,加 \

  • 取消字符串转义:
import re
s1 = "p\tpython\thello"
s2 = r"p\tpython\thello"

print(s1)       #p	python	hello
print(s2)       #p\tpython\thello
  • 取消正则语法的转义,加\
import re
s = "z\nzo\nzoo"
# 取消字符串的转义可以在前面加r
# 取消正则语法的转义,加\
print(s)
r1 = re.findall(".", s)          #单行匹配
r2 = re.findall(".", s, re.M)    #多行匹配,此时 . 表示通配符
r3 = re.findall("\.", s, re.M)   #多行匹配,用了\取消转义,此时 . 表示 .本身,故返回空
print(r1)     #['z', 'z', 'o', 'z', 'o', 'o']
print(r2)     #['z', 'z', 'o', 'z', 'o', 'o']
print(r3)     #[]

猜你喜欢

转载自blog.csdn.net/qq_34120459/article/details/87883874