进程、requests模块的简单使用

一、进程

  1.进程间数据不共享

import multiprocessing
data_list = []
def task(arg):
    data_list.append(arg)
    print(data_list)
def run():
    for i in range(10):
        m = multiprocessing.Process(target=task,args=(i,))
        m.start()
if __name__ == '__main__':
    run()
# [0]        #分别打印了一个列表
# [1]
# [2]
# [3]
# [4]
# [5]
# [6]
# [7]
# [8]
# [9]
View Code

  常用功能:

    join、deamon、name、multiprocessing.current_process()、multiprocessing.current_process().ident/pid

import time
import multiprocessing
def task(arg):
    time.sleep(2)
    print(arg)
def run():
    print(1111111)
    p1 = multiprocessing.Process(target=task,args=(1,))
    p1.name = "pp1"
    p1.start()
    print(2222222)

    p2 = multiprocessing.Process(target=task,args=(2,))
    p2.name = "pp2"
    p2.start()
    print(33333333)
if __name__ == "__main__":
    run()
常用功能示例

  通过继承方式创建进程

class MyProcess(multiprocessing.Process):
    def run(self):
        print("当前进度",multiprocessing.Process)
def run():
    p1 = MyProcess()
    p1.start()

    p2 = MyProcess()
    p2.start()

if __name__ == '__main__':
    run()
View Code

  2.进程间数据共享

Queue

  linux:

q = multiprocessing.Queue()
def task(arg,q):
    q.put(arg)
def run():
    for i in range(1,11):
        p = multiprocessing.Process(target=task,args=(i,q))
        p.start()
    while 1:
        v = q.get()
        print(v)
if __name__ == '__main__':
    run()
View Code

  windows:

def task(arg,q):
    q.put(arg)
if __name__ == '__main__':
    q = multiprocessing.Queue()
    for i in range(10):
        p = multiprocessing.Process(target=task,args=(i,q))
        p.start()
    while 1:
        v = q.get()
        print(v)
View Code

Manage

  linux:

m = multiprocessing.Manager()
dic = m.dict()
def task(arg):
    dic[arg] = 100
def run():
    for i in range(10):
        p = multiprocessing.Process(target=task,args=(i,))
        p.start()
        input(">>>>>>")
        print(dic.values())
if __name__ == '__main__':
    run()
View Code

  windows:

def task(arg,dic):
    dic[arg] = 100
def run():
    m = multiprocessing.Manager()
    dic = m.dict()
    lis = []
    for i in range(10):
        p = multiprocessing.Process(target=task,args=(i,dic,))
        p.start()
        lis.append(p)
    while 1:
        count = 0
        for p in lis:
            if not p.is_alive():
                count += 1
        if count == len(lis):
            break
    print(dic)
if __name__ == '__main__':
    run()
View Code

  3.进程锁:

    和线程锁种类用法一致

  4.进程池:

import time
from concurrent.futures import ProcessPoolExecutor
def task(arg):
    time.sleep(2)
    print(arg)
if __name__ == '__main__':
    pool = ProcessPoolExecutor(5)
    for i in range(10):
        pool.submit(task,i)
View Code

  requests模块的简单爬虫

  安装:(cmd)

    pip3 install requests

    pip3 install beautifulsoup4

import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
def task(url):
    r1 = requests.get(url=url,headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
    })
    soup = BeautifulSoup(r1.text,"html.parser")
    content_list = soup.find('div',attrs={'id':'content-list'})
    for item in content_list.find_all('div',attrs={'class':'item'}):
        title = item.find('a').text.strip()
        target_url = item.find('a').get('href')
        print(title,target_url)
def run():
    pool = ThreadPoolExecutor(5)
    for i in range(1,50):
        pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)
if __name__ == '__main__':
    run()
View Code

  以上示例用多线程好

  requests模块模拟浏览器发送请求

    requests.get():

  线程和线程池

  

猜你喜欢

转载自www.cnblogs.com/qq849784670/p/9635973.html