全天课笔记-210181125
写一个类,能够统计某个文件的纯数字字符个数,统计非空白个数,空白个数,行数
能够读取文件中的某一行
通过继承方式,增加一个方法,打印所有的统计信息
import os.path
class FileInfo(object):
"""统计文件的数字字符个数、
非空白数字个数、
空白字符个数、2
文件行数、
文件所在路径"""
def __init__(self,file_path,encoding_type="utf-8"):
self.file_path = file_path
self.encoding_type = encoding_type
while 1:
if not os.path.exists(self.file_path):
self.file_path=input(
"实例化的文件路径不存在,请重新输入:")
else:
break
def get_file_content(self):
content=""
with open(self.file_path,encoding=self.encoding_type) as fp:
content = fp.read()
return content
def count_number_str(self):
"""统计文件中的数字字符个数"""
count =0
content = self.get_file_content()
for i in content:
if i>="0" and i<="9":
count+=1
return count
def count_not_space_str(self):
"""统计文件中的非空白字符个数"""
count =0
content = self.get_file_content()
for i in content:
if not i.isspace():
count+=1
return count
def count_space_str(self):
"""统计文件中的空白字符个数"""
count =0
content = self.get_file_content()
for i in content:
if i.isspace():
count+=1
return count
def count_lines(self):
"""统计文件中的行数"""
count =0
content = self.get_file_content()
for i in content.split("\n"):
count+=1
return count
class Advanced_FileInfo(FileInfo):
"""高级的文件信息处理类"""
def __init__(self,file_path,encoding_type="utf-8"):
FileInfo.__init__(self,file_path,encoding_type="utf-8")
def get_content_by_line_num(self,line_number):
try:
return self.get_file_content().split("\n")[line_number-1]
except:
return None
def print_file_info(self):
print("文件的统计信息如下:")
print("文件中包含的数字数量:%s" %self.count_number_str())
print("文件中包含的非空白字符数量:%s" %self.count_not_space_str())
print("文件中包含的空白字符数量:%s" %self.count_space_str())
print("文件中包含的行数:%s" %self.count_lines())
fi = Advanced_FileInfo("e:\\a.txt")
print("获取第一行的文件内容:",fi.get_content_by_line_num(1))
fi.print_file_info()
正则表达式
>>> import re
>>> re.match(r".","abc")
<_sre.SRE_Match object; span=(0, 1), match='a'>
>>> re.match(r"..","abc")
<_sre.SRE_Match object; span=(0, 2), match='ab'>
>>> re.match(r".....","abc")
>>> print(re.match(r".....","abc"))
None
>>> print(re.match(r"...","a\nc"))
None
>>> print(re.match(r"...","a\nc",re.DOTALL))
<_sre.SRE_Match object; span=(0, 3), match='a\nc'>
>>> """I AM
... FSDFDS
... FSDW
... """
'I AM\nFSDFDS\nFSDW\n'
>>>
>>> print(re.match(r"[^abc]","hxxx"))
<_sre.SRE_Match object; span=(0, 1), match='h'>
>>> print(re.search(r"abc","sssssabc"))
<_sre.SRE_Match object; span=(5, 8), match='abc'>
>>> print(re.search(r"^abc","sssssabc"))
None
>>> print(re.match(r"\d","123"))
<_sre.SRE_Match object; span=(0, 1), match='1'>
>>> print(re.match(r"\d+","123"))
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> print(re.match(r"\d*","123"))
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>>
>>> print(re.match(r"\d*","123"))
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> print(re.match(r"\d*","a123"))
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> print(re.match(r"\d*?","123"))
<_sre.SRE_Match object; span=(0, 0), match=''>
>>> print(re.match(r"\d+?","123"))
<_sre.SRE_Match object; span=(0, 1), match='1'>
>>> re.search(r"\d+","abc123dee").group()
'123'
>>> sorted(['5', '12', '123', '1234'],key=lambda x:len(x))[-1]
'1234'
>>> map(len,set(["a","abc"]))
<map object at 0x0000000002606198>
>>> list(map(len,set(["a","abc"])))
[1, 3]
>>> re.findall(r"\d+","1a12b123c1234d")
['1', '12', '123', '1234']
>>> re.findall(r"[a-zA-Z]+","1a12b123c1234d")
['a', 'b', 'c', 'd']
>>> re.findall(r"[a-zA-Z]+","1ab12bc123cd1234dA")
['ab', 'bc', 'cd', 'dA']
>>> re.findall(r"[A-Z]+[a-z]+|[a-z]+","ABBBossssAA abc")
['ABBBossss', 'abc']
>>> re.findall(r"[A-Z]+[a-z]*|[a-z]+","ABBBossssAA abc ABC")
['ABBBossss', 'AA', 'abc', 'ABC']
>>> re.search(r"\s","ab cd")
<_sre.SRE_Match object; span=(2, 3), match=' '>
>>> re.search(r"\s+","ab\t \r\ncd")
<_sre.SRE_Match object; span=(2, 9), match='\t \r\n'>
>>> re.findall(r"\S+","ab cd\t ef\nhi")
['ab', 'cd', 'ef', 'hi']
>>> "".join(re.findall(r"\S+","ab cd\t ef\nhi"))
'abcdefhi'
>>> re.search(r"\w+","aaaZAW0123_")
<_sre.SRE_Match object; span=(0, 11), match='aaaZAW0123_'>
>>> re.search(r"\w+","aaaZAW0123_").group()
'aaaZAW0123_'
>>> re.search(r"\W+","aaaZAW0123_-").group()
'-'
>>> re.search(r"\d?","a7").group()
''
? 0或1个
*0或多个
+1或多个
>>> re.search(r"\d{3}","123456789").group()
'123'
>>> re.search(r"\d{1,3}","123456789").group()
'123'
>>> re.search(r"\d{1,3}?","123456789").group()
'1'
>>> re.search(r"^abc","abcdddabc") #从开头匹配^
<_sre.SRE_Match object; span=(0, 3), match='abc'>
>>> re.search(r"^abc","dddabc")
>>> re.search(r"^\d+","133dddabc")
<_sre.SRE_Match object; span=(0, 3), match='133'>
>>> re.search(r"\d+$","133dddabc5555")
<_sre.SRE_Match object; span=(9, 13), match='5555'>
>>> re.search(r"^123$","123")
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> re.search(r"^123$","123sss") #等价于re.search(r"\A123\Z","123")
>>> re.search(r"^123$","ss123")
>>> re.search(r"\A123\Z","123")
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> re.search(r"\d(\D+)\d","1abc3").group(1)
'abc'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(1)
'1'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(2)
'abc'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(2)
'abc'
>>> re.search(r"(\d)(\D+)(\d)","1abc3").group(3)
'3'
>>> pattern = re.compile(r"\d+")
>>> pattern.search("abc123")
<_sre.SRE_Match object; span=(3, 6), match='123'>
>>> result = pattern.search("123ddddd")
>>> if result:
... print("匹配到了")
... else:
... print("没有匹配到!")
...
匹配到了
3个数字和3个字母,3个数字开头或者3个字母开头
>>> re.match(r"\d{3}[a-zA-Z]{3}$|[a-zA-Z]{3}\d{3}$","abc123")
>>> re.match(r".","\ndb",re.DOTALL)
<_sre.SRE_Match object; span=(0, 1), match='\n'>
>>> re.DOTALL
<RegexFlag.DOTALL: 16>
>>> re.match(r".","\ndb",16)
<_sre.SRE_Match object; span=(0, 1), match='\n'>
>>> re.match(r"ABc","abc",re.I) #re.I 忽略大小写
<_sre.SRE_Match object; span=(0, 3), match='abc'>
>>> re.match(r"ABc","abc")
p = re.compile(r'(\w+) (\w+)(?P<sign>.*)', re.DOTALL)
#获取表达式中分组的数量
print("p.groups: ", p.groups)
>>> pattern =re.compile(r"abc")
>>> pattern.match("123abc")
>>> pattern.match("123abc",3)
<_sre.SRE_Match object; span=(3, 6), match='abc'>
>>> pattern.match("123abc",3,6)
<_sre.SRE_Match object; span=(3, 6), match='abc'>
>>> re.findall(r"\d+","1a2b3c")
['1', '2', '3']
>>> re.findall(r"[a-z](\d+)","1a2b3c")
['2', '3']
>>> re.findall(r"([a-z])(\d+)","1a2b3c")
[('a', '2'), ('b', '3')]
#有分组的情况下,只返回分组里面的内容,将分组通过元组的形式返回
>>> re.findall(r"(([a-z])(\d+)([a-z]))","a1ab2bc3c")
[('a1a', 'a', '1', 'a'), ('b2b', 'b', '2', 'b'), ('c3c', 'c', '3', 'c')]
>>> s="a1a\nb2b\nc2c\n"
>>> re.search(r"[a-z]$",s)
<_sre.SRE_Match object; span=(10, 11), match='c'>
>>> re.search(r"[a-z]$",s,re.M)
<_sre.SRE_Match object; span=(2, 3), match='a'>
>>> re.findall(r"[a-z]$",s)
['c']
>>> re.findall(r"[a-z]$",s,re.M)
['a', 'b', 'c']
for i in re.finditer(r'[A-Za-z]+','one12two34three56four') :
print(i.group(),end=" ")
p = re.compile(r'\d+')
#不指定分割次数
resList = p.split('one1two2three3four4')
print(resList)
>>> resList = p.split("a 2 b 2 c 5 d",2) #指定切割次数
>>> print(resList)
['a', 'b', 'c 5 d']
>>> "aabbcc".replace("bb","**")
'aa**cc'
re.substitute
>>> re.sub(r"\d+","**","aa11bb22cc")
'aa**bb**cc'
>>> re.subn(r"[ \t\r]+","","aa 11b b22 \n \t cc")
('aa11bb22\ncc', 4)
p = re.compile(r'(\w+) (\w+)')
s = 'i say, hello world!'
#\2, \1表示分组引用,分别代表第二个分组,第一个分组
print(p.sub(r'\2 \1', s))
#当repl为方法时,将匹配的结果m传入方法
def func(m):
print("group1:",m.group(1))
print("group2:",m.group(2))
return m.group(1).title() +" "+ m.group(2)
print(p.sub(func, s))
import re
def multiply(m):
# 将分组0的值转成整型
v = int(m.group(0))
# 将分组0的整型值乘以2,然后返回
return str(v * 2)
# 使用multiply方法作为第二个参数,将匹配到的每个数字作为参数传入multiply函数,处理后将返回结果替换为相应字符串
result = re.sub("\d+", multiply, "10 20 30 40 50")
print(result)
>>> re.search(r"(?P<num>\d+)","123")
<_sre.SRE_Match object; span=(0, 3), match='123'>
>>> re.search(r"(?P<num>\d+)","123").group(1)
'123'
>>> re.search(r"(?P<num>\d+)","123").group("num")
'123'
>>> re.search(r"(?P<num>\d+) (?P=num)","123 123").group()
'123 123'
>>> re.search(r"(?P<num>\d+) (?P=num)","123 456").group()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'group'
>>> re.search(r"(\d+) \1","123 123").group(1)
'123'
>>> re.search(r"(\d+) \1","123 456").group(1)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'NoneType' object has no attribute 'group'
>>> re.search(r"((\d+) (\d+))","123 456").group()
'123 456'
>>> re.search(r"((\d+) (\d+))","123 456").group(1)
'123 456'
>>> re.search(r"((\d+) (\d+))","123 456").group(2)
'123'
>>> re.search(r"((\d+) (\d+))","123 456").group(3)
'456'
m = re.search(r'(\w+)! (\w+) (\w+)','HMan! gloryroad train')
#将匹配的结果带入
print(m.expand(r'resut:\3 \2 \1'))
group(0) 和 group() 匹配的内容
group(1) 第一个分组
import re
a = re.compile(r"""\d+ # 匹配至少1个连续的数字,自定义注释
\. # 匹配点(.)
\d* # 匹配数字至少0个""", re.X)
b = re.compile(r"\d+\.\d*") #a和b的正则表达式等价的
print(a.search("test12.58 2.0 abc 3.8").group())
>>> re.search(r"((?<=abc)\d+)","abc123deb")
<_sre.SRE_Match object; span=(3, 6), match='123'>
>>> re.search(r"((?<=abc)\d+)","abc123deb").group()
'123'
>>> re.search(r"(\d+(?=abc))","xbc123abc").group()
'123'
>>> re.search(r"(\d+(?=abc))","xbc123abc").group()
'123'
>>> re.search(r"(?<=xbc)(\d+(?=abc))","xbc123abc").group()
'123'
>>> re.search(r"(?<!xbc)\d+","abc123abc").group()
'123'
>>> re.search(r"(?<!xbc)\d+","xbc123abc").group()
'23'
>>> re.search(r"(?<!xbc)\d+?","xbc123abc").group()
'2'
>>> re.search(r"\d+(?!xbc)","123abc").group()
'123'
>>> re.search(r"\d+(?!xbc)","123xbc").group()
'12'