python爬虫-第一次尝试

跟着教学视频一步步来,敲出了如下的代码:

import requests
from lxml import etree
# pip install lxml == 4.1.0
# from pyquery import PyQuery

import re

#网页地址
url = "https://www.xicidaili.com/"
#参数 告诉服务器,我们是从浏览器来的
#反爬第一步 headers
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3573.0 Safari/537.36",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9"
}

proxies = {
  'http': '119.101.113.16:9999',
  'https': '119.101.113.16:9999',
}
# response获取到的数据
try:
    #  都是高级用法 超出时间:timeout
    response = requests.get(url=url,headers=headers, proxies=proxies)
except Exception as e:
    print(e)
    response = None
else:
    print(response.status_code)
# 正则 xpath css select
# print(response.text)
#把网页变成xpath对象
if response:
    res_xpath = etree.HTML(response.text)
    for node in res_xpath.xpath('//table[@id="ip_list"]//tr')[1:]:
        print(node.xpath('./td[2]/text()'),end=":")
        print(node.xpath('./td[3]/text()'))


得到的获取到的数据如下:

C:\ProgramData\Anaconda3\python.exe E:/PythonProjects/chapter1Pythonbase/pytest.py
200
[]:[]
['119.101.115.95']:['9999']
['119.101.118.81']:['9999']
['121.61.32.67']:['9999']
['119.101.115.125']:['9999']
['119.101.114.113']:['9999']
['119.101.115.162']:['9999']
['119.101.117.228']:['9999']
['119.101.114.194']:['9999']
['119.101.113.95']:['9999']
['27.24.197.57']:['9999']
['115.155.122.148']:['8118']
['119.101.114.203']:['9999']
['111.181.70.143']:['9999']
['119.101.112.109']:['9999']
['122.4.41.192']:['9999']
['113.128.9.142']:['9999']
['119.101.118.179']:['9999']
['119.101.113.231']:['9999']
['119.101.115.5']:['9999']
['112.85.169.19']:['9999']
[]:[]
[]:[]
['1.192.240.88']:['9797']
['140.143.170.222']:['8118']
['125.123.143.171']:['9000']
['163.125.71.138']:['9999']
['163.125.68.57']:['8888']
['113.116.56.226']:['9000']
['101.132.122.230']:['3128']
['121.69.37.6']:['9797']
['119.176.80.220']:['9999']
['112.115.57.20']:['3128']
['182.61.170.45']:['3128']
['221.6.201.18']:['9999']
['112.117.204.176']:['9999']
['61.160.233.111']:['10000']
['113.235.187.147']:['9999']
['112.95.23.197']:['8888']
['61.160.233.110']:['10000']
['222.132.145.122']:['53281']
['125.46.0.62']:['53281']
['163.125.156.29']:['9999']
[]:[]
[]:[]
['119.101.115.95']:['9999']
['119.101.118.81']:['9999']
['119.101.117.228']:['9999']
['119.101.114.194']:['9999']
['119.101.113.95']:['9999']
['27.24.197.57']:['9999']
['115.155.122.148']:['8118']
['119.101.114.203']:['9999']
['119.101.112.109']:['9999']
['119.101.113.231']:['9999']
['112.85.169.19']:['9999']
['112.85.164.132']:['9999']
['125.123.142.173']:['9999']
['58.55.148.161']:['9999']
['119.101.116.161']:['9999']
['110.52.235.142']:['9999']
['223.215.186.198']:['9999']
['119.101.113.38']:['9999']
['119.101.117.215']:['9999']
['119.101.113.147']:['9999']
[]:[]
[]:[]
['121.61.32.67']:['9999']
['119.101.115.125']:['9999']
['119.101.114.113']:['9999']
['119.101.115.162']:['9999']
['111.181.70.143']:['9999']
['122.4.41.192']:['9999']
['113.128.9.142']:['9999']
['119.101.118.179']:['9999']
['119.101.115.5']:['9999']
['119.101.112.234']:['9999']
['119.101.118.172']:['9999']
['183.148.159.165']:['9999']
['112.85.164.178']:['9999']
['111.181.68.220']:['9999']
['183.16.230.145']:['8123']
['114.239.150.251']:['9999']
['114.106.151.103']:['9999']
['119.101.113.245']:['9999']
['171.80.136.199']:['9999']
['119.101.117.161']:['9999']
[]:[]
[]:[]
['121.31.150.165']:['6666']
['121.31.193.132']:['6675']
['182.33.217.116']:['6666']
['117.69.98.216']:['6666']
['121.31.103.33']:['6666']
['113.121.245.231']:['6675']
['113.121.245.32']:['6667']
['218.73.130.59']:['6675']
['110.73.33.207']:['6673']
['110.73.30.246']:['6666']
['112.114.78.54']:['6673']
['171.38.64.67']:['6675']
['112.114.76.176']:['6668']
['222.172.239.69']:['6666']
['114.239.253.38']:['6666']
['116.28.106.165']:['6666']
['220.179.214.77']:['6666']
['110.73.32.7']:['6666']
['114.139.48.8']:['6668']
['111.124.231.101']:['6668']

Process finished with exit code 0

猜你喜欢

转载自blog.csdn.net/ykallan/article/details/86314681