Python爬虫-urllib的基本用法

from urllib import response,request,parse,error
from http import  cookiejar



if __name__ == '__main__':
    #response = urllib.request.urlopen("http://www.baidu.com")
    #print(response.read().decode("utf-8"))

    #以post形式发送，没有data就是get形式
    #请求头
    #data = bytes(urllib.parse.urlencode({"word":"hello"}),encoding="utf-8")
    #response = urllib.request.urlopen("http://httpbin.org/post",data=data)
    #print(response.read())

    #时间限制
    #response = urllib.request.urlopen("http://www.baidu.com",timeout=0.01)
    #print(response.read().decode("utf-8"))

    #响应处理
    #response = urllib.request.urlopen("http://www.python.org")
    #print(type(response))
    #状态码
    #print(response.status)
    #相应头
    #print(response.getheaders())
    #print(response.getheader("Server"))


    #复杂请求 request
    #request = urllib.request.Request("http://python.org")
    #response = urllib.request.urlopen(request)
    #print(response.read().decode("utf-8"))

    #请求头
    # add_header也可以
    """
    url = "http://httpbin.org/post"
    headers = {
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
        "Host":"httpbin.org"
    }
    dict = {
        "name":"Germey"
    }
    data = bytes(parse.urlencode(dict),encoding="utf8")
    req = request.Request(url,data,headers,method="POST")
    response = request.urlopen(req);
    print(response.read())
    """

    #代理
    """
    proxy_header = request.ProxyHandler({
        #代理IP
    })
    opener = request.build_opener(proxy_header)
    response = opener.open("http://httpbin.org/get")
    
    #cookies(维持登录状态)
    cookie = cookiejar.CookieJar()
    handler = request.HTTPCookieProcessor(cookie)
    opener = request.build_opener(handler)
    response = opener.open("http://www.baidu.com")
    """

    #保存cookies
    #MozillaCookieJar,LWPCookieJar

    #捕捉异常 基本上HTTPError或者URLError
    """
    try:
        response = request.urlopen("http://amojury.github.io")
    except error.URLError as e:
        print(e.reason)
    """

    #URL解析相关 urlparse urlunparse(反解析) urlencode(字典转请求参数）
    #result = parse.urlparse("https://www.baidu.com/s?ie=utf-8&f=3&rsv_bp=0&rsv_idx=1&tn=baidu&wd=python%20%E6%89%B9%E9%87%8F%E6%B3%A8%E9%87%8A&rsv_pq=f9b1a8b300011700&rsv_t=1252nVpaBhdm%2FEdlsdrPgUxIHLfk4QNB443eSTUKoRcHFx9G09YZi9N9Dvo&rqlang=cn&rsv_enter=1&rsv_sug3=9&rsv_sug1=8&rsv_sug7=101&rsv_sug2=1&prefixsug=python%2520%25E6%2589%25B9%25E9%2587%258F&rsp=0&inputT=10498&rsv_sug4=14994")
    #print(result)
Python爬虫-urllib的基本用法

猜你喜欢