collection模块,re模块

1.collections模块

1.namedtuple
from collections import namedtuple
Point = namedtuple('Point',['x', 'y'])
print(type(Point))
p = Point(1, 2)
print(type(p))
print(p)
print(p[0])
print(p[1])
print(p.x)
print(p.y)
import time
struct_time = time.strptime('2019-7-2','%Y-%m-%d')
print(struct_time)
print(struct_time[0])
print(struct_time.tm_yday)

struct_time = namedtuple('struct_time',['tm_year', 'tm_mon', 'tm_mday'])
st = struct_time(2019, 7, 2)
print(st)
2.deque: 类似于列表的一种容器型数据,插入元素删除元素效率高.
from collections import deque
q = deque(['a', 1, 'c', 'd'])
print(q)
q.append('e')
q.append('f')
print(q)
q.appendleft('ly')
q.appendleft('dsb')
print(q)
q.pop()    =>    默认删除最后一个
q.popleft()
print(q)
print(q[0])   =>   按照索引取值
del q[2]    
print(q)     =>    按照索引删除任意值
q.insert(1,'2')
print(q)
3.OrderedDict
d = dict([('a', 1), ('b', 2), ('c', 3)])
print(d)
from collections import OrderedDict
od = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
print(od)
print(od['a'])
print(od['b'])
4.defaultdict    #默认值字典
from collections import defaultdict
l1 = [11, 22, 33, 44, 55, 77, 88, 99]
dic = {}
for i in l1:
    if i < 66:
        if 'key1' not in dic:
            dic['key1'] = []
        dic['key1'].append(i)
    else:                             =>    以前方法
        if 'key2' not in dic:
            dic['key2'] = []
        dic['key2'].append(i)
print(dic)     

l1 = [11, 22, 33, 44, 55, 77, 88, 99]
dic = defaultdict(list)
for i in l1:
    if i < 66:
        dic['key1'].append(i)
    else:
        dic['key2'].append(i)
print(dic)   =>   defaultdict(<class 'list'>, {'key1': [11, 22, 33, 44, 55], 'key2': [77, 88, 99]})

dic = defaultdict(list) 
dic['1']
dic['2']
print(dic)   =>   defaultdict(<class 'list'>, {'1': [], '2': []})

dic = dict.fromkeys('12',[])
print(dic)

dic = defaultdict(lambda :None)  #需要一个可回调的
for i in range(1,4):
    dic[i]
print(dic)   =>   defaultdict(<function <lambda> at 0x00000251F4221EA0>, {1: None, 2: None, 3: None})
5.Counter
from collections import Counter
c = Counter('flkjdasffdfakjsfdsaklfdsalf')  # 计数器
print(c)   =>   Counter({'f': 7, 'd': 4, 'a': 4, 's': 4, 'l': 3, 'k': 3, 'j': 2})
print(c['f'])  =>  7

2.re模块

import re
1.findall
正则表达式: 从一大堆字符串中,找出你想要的字符串,在于对你想要得这个字符串进行一个精确地描述
# 单个字符匹配
\w 数字字母下划线中文
\W 非数字字母下划线中文
print(re.findall('\w', '太白jx 12*() _'))
print(re.findall('\W', '太白jx 12*() _'))
\s  匹配的 空格 \t \n
\S  匹配的 非空格 \t \n
print(re.findall('\s','太白barry*(_ \t \n'))
print(re.findall('\S','太白barry*(_ \t \n'))
\d 匹配所有的数字
\D 非匹配所有的数字
print(re.findall('\d','1234567890 alex *(_'))
print(re.findall('\D','1234567890 alex *(_'))
\A ^从开头开始匹配
print(re.findall('\Ahello','hello hello 太白 hell'))
print(re.findall('^hello','hello hello 太白 hell'))
\Z,从结尾开始匹配
$从结尾开始匹配
print(re.findall('太白金星\Z','fjkdsla太白金星'))
print(re.findall('金星$','fjkdsla太白金星'))
# 元字符匹配
如果匹配成功光标则移到匹配成功的最后的字符,如果匹配未成功光标则向下移动一位再次匹配
. 匹配任意1个字符
print(re.findall('a.b','aaabbb'))
? 匹配0个或者1个由左边字符定义的片段
print(re.findall('a?b', 'sb ab aabb'))
* 匹配0个或者多个左边字符表达式   满足贪婪匹配
print(re.findall('a*b','aaaab ab b'))
+ 匹配1个或者多个左边字符表达式   满足贪婪匹配
print(re.findall('a+b','aaab ab b'))
{m,n}  匹配m个至n(n能取到)个左边字符表达式   满足贪婪匹配
print(re.findall('a{1,5}b', 'ab aab aaab aaaab aaaaab aaaaaab'))
.*  贪婪匹配   从头到尾
print(re.findall('a.*b','aab abbliye aaab abbb aa#b'))
.*? 非贪婪匹配  从头到尾
print(re.findall('a.*?b','ab a#bbbbbb aaab'))
print(re.findall('a.*?b','a#bbbb\nabb'))
# []
print(re.findall('a[abc][bd]b', 'aabb aaabc abd acdbb'))
print(re.findall('a[0-9]b', 'a1b a3b aeb a*b arb a_b'))
print(re.findall('a[a-z]b', 'a1b a3b aeb a*b arb a_b'))
print(re.findall('a[A-Z]b', 'aAb a3b aEb a*b aRb a_b'))
print(re.findall('a[a-zA-Z]b', 'aab a3b aAb a*b aTb a_b'))
当你想匹配 - 时,要把它放在[]里面的最前面或者最后面
print(re.findall('a[-*$]b', 'a-b a$b a)b a*b '))
^ 在[]里面最前面代表取反
print(re.findall('a[^0-9]b', 'a1b a$b a5b a*b '))
当你想匹配 - 时,不能把它放在[]里面的最前面
print(re.findall('a[*^]b', 'a^b a$b a5b a*b '))
# ()
s = 'alex_sb wusir_sb ritian_sb 太白_nb yuanbao_sb dsb_sb'
print(re.findall('\w+_sb',s))
print(re.findall('(\w+)_sb',s))
() 分组里面加了?:  =>  将全部的内容给我返回回来,而不是将组内的内容返回
print(re.findall('companies|company','Too many companies have gone bankrupt, and the next one is my company')) 
print(re.findall('compan(?:ies|y)','Too many companies have gone bankrupt, and the next one is my company'))
# |
print(re.findall('alex|太白|wusir', 'alex太白wusiraleeeex太太白odlb'))
2.search match
search  找到第一个符合条件的字符串就返回,返回一个对象,通过对象.group()
ret = re.search('sb|alex', 'alex sb alex sb barry 日天')
print(ret)
print(ret.group())
match  从字符串开头匹配,如果以符合条件的字符串开头则返回,否则返回None
ret = re.match('alex', 'alexfdskfd fjdsklalex gfdlgjfdlgjfggfjlgjfkdl')
print(ret)
print(ret.group())
3.split
print(re.split('[;, ~]','alex;wusir,太白 吴超~宝元'))
4.sub  替换
print(re.sub('barry', '太白', 'barry是最好的讲师,barry就是一个普通老师,请不要将barry当男神对待。'))
5.compile
obj = re.compile('\d{2}')
print(obj.search('fdsa12fds435454').group())
print(obj.findall('fjdskalf2134fkjsd324fdjsl545'))
6.finditer
ret = re.finditer('\d','54fjdkls4535lsdfj6776')
print(ret)
print(next(ret))
print(next(ret).group())
print(next(ret).group())
for i in ret:
    print(i.group())
练习
s1 = '''
时间就是1995-04-27,2005-04-27
1999-04-27 老男孩教育创始人
老男孩老师 alex 1980-04-27:1980-04-27
2018-12-08
'''
print(re.findall('\d\d\d\d-\d\d-\d\d',s1))
匹配一个qq账号 10000开始 第一个元素规定就是非零数字,后面的是随意的数字长度大于5位
print(re.findall('[1-9][0-9]{4,11}','3243242343244324'))

猜你喜欢

转载自www.cnblogs.com/wxl1025/p/11139600.html