自己动手做爬虫五

本文链接： https://blog.csdn.net/weixin_43999482/article/details/102147048

抛出自定义异常

#!/usr/bin/env python
#-*- coding: utf-8 -*-
#@Time:10/5/2019 1:41 PM
#@Author:chengwei
#@File:01-抛出自定义异常.py
#@Software:PyCharm


#自定义异常类
class CustomException(Exception):
    def __init__(self,content):
        self.content = content

    #表示抛出异常现实的异常描述信息
    def __str__(self):
        return "我是一个自定义异常，异常数据为%s"%self.content

#这里的异常是一个系统抛出的异常，然后会指定异常信息
#a = 10
#del a
#print(a)

content = input("其输入一个数据：")
if content != "a":
    #抛出自定义异常
    raise CustomException(content)
    #raise只能抛出异常

模块的介绍

#模块，一个.py文件，就是一个模块
#可以定义类，定义函数，定义全局变量，执行对应的功能代码

内置模块

#内置模块：就是Python自己的内部的模块 如time random
import time
import random

result = random.randint(1,10)
time.sleep(2)
print(result)

自定义模块

#定义一个全局变量
g_num = 10
#定义函数
def show():
    print("我是一个show函数")

def sum_num(num1,num2):
    result = num1+ num2
    return result
#查看模块名
print(__name__)
if __name__ == "__main__":
    #判断是否是主模块
    #测试上面的函数是否有问题
    show()
    print(sum_num(1,3))






#自定义模块名义与变量名类似
#模块的命名规则和变量的命名规则一样
#使用模块中的功能代码
#主模块：执行代码为主模块

import first_module
print(first_module.g_num)
a = first_module.sum_num(2,3)
print(a)
#查看模块名
print(__name__)

模块的一个导入

# import 模块
import first_module
#from 模块名 import 函数名
from first_module import sum_num
#from 模块名 import * 导入模块中所有的功能代码
from first_module import *
#from 模块名 import *  使用__all__限定导入功能代码
from first_module import *
__all__ = ["g_num","show"]#指定功能代码，不是所有功能代码

StringIO


#把字符串数据写入到内存
import io
#StringIO的操作和文件的写入与读取很类似
str_io = io.StringIO()
#向内存写入字符串数据
str_io.write("hello")
str_io.write("world")

#获取数据
content = str_io.getvalue()
print(content)

#设置文件指针的位置到文件开头
str_io.seek(0)
#默认是全部读取
#read(n):读取指定长度
result = str_io.read(5)
print(result)

byteIO

from io import BytesIO
byte_io = BytesIO()
#向内存写入二进制数据
byte_io.write("哈哈".encode("utf-8"))
data = byte_io.getvalue()
print(data)
content = data.decode("utf-8")
print(content)

序列化

#序列化：把内存中的数据保存到本地，可以持久存储
import pickle #比较同意，可以序列化任意对象
my_list = [{"name" : "张三","age" : 20},{"name" : "李思","age" : 20}]
file = open("my_list.md","wb")
#序列化
pickle.dump(my_list,file)
file.close()

file = open("my_list.md","rb")
my_list = pickle.load(file)
print(my_list)
file.close()

class Student():
    def __init__(self,name,age):
        self.name = name
        self.age = age

stu = Student("三三",20)
file = open("student.txt","wb")
pickle.dump(stu,file)
file.close()

file = open("student.txt","rb")
m = pickle.load(file)
print(m.name,m.age)

可迭代对象

DeprecationWarning: Using or importing the ABCs from ‘collections’ instead of from ‘collections.abc’ is deprecated, and in 3.8 it will stop working
from collections import Iterable
表示我们要用
from collections.abc import Iterable

#可迭代对象：就是使用for循环遍历取值的对象就是可迭代对象
#for: 列表，元组，字典，字符串，集合，range
from collections.abc import Iterable
result = isinstance([123,45,5],Iterable)
print(result)

#可迭代对象有一个__iter__方法
result = dir([1,23])
print(result)
#数字不是可迭代类型
#我们之后可以用isinstance可以判断是否是你需要的类型

迭代器

#迭代器：在类里面有__iter__和__next__的方法创建的对象就是迭代器，作用：根据数据的位置获取下一个位置数据
class MyIterater(object):
    def __init__(self):
        self.my_list = [4,5,19]
        self.current_index = 0;

    def __iter__(self):
        #返回一个迭代器对象
        return self
    def __next__(self):
        if self.current_index < len(self.my_list):
            #获取迭代器中的下一个值
            result = self.my_list[self.current_index]
            self.current_index +=1
            return result
        else:
            #抛出停止迭代异常
            raise StopIteration()

my_list = MyIterater()
for value in my_list:
    print(value)

生成器

#可迭代对象：就是使用for循环遍历取值的对象就是可迭代对象
#for: 列表，元组，字典，字符串，集合，range
from collections.abc import Iterable
result = isinstance([123,45,5],Iterable)
print(result)

#可迭代对象有一个__iter__方法
#值只能往后面去，不能往前去
result = dir([1,23])
print(result)
#数字不是可迭代类型
#我们之后可以用isinstance可以判断是否是你需要的类型

线程

#线程：就是执行代码的分支，默认程序是只有一个线程
import time
import threading

def AA(count):
    for i in range(count):
        print("---a---")
        time.sleep(0.2)

def BB(count):
    for i in range(count):
        print("---b---")
        time.sleep(0.2)

if __name__ == "__main__":
    #判断模块是否是主模块
    #target表示目标模块
    #arg:表示已元组的方式给函数转参
    #kwargs:表示以字典的方式传参
    #sub_thread = threading.Thread(target = AA,arg = (3,))
    sub_thread = threading.Thread(target=AA,kwargs={"count":5})
    three_thread = threading.Thread(target=BB, args=(7,))
    #启动线程，只有启动才会执行
    sub_thread.start()
    three_thread.start()

    # #主线程执行bb
    # BB(3)
    #线程是无序的，有CPU调度觉得