scrapy startproject ippool
cd ippool
scrapy ganspider ip ww.xxx.com
# -*- coding: utf-8 -*-
import scrapy
from ippool.items import IppoolItem
class IpSpider(scrapy.Spider):
name = 'ip'
allowed_domains = ['https://www.xicidaili.com/nn/']
start_urls = ['https://www.xicidaili.com/nn/']
def parse(self, response):
tr_list = response.xpath('//*[@id="ip_list"]//tr')
trs = tr_list[1:]
for tr in trs:
item = IppoolItem()
item['ip'] = tr.xpath('td[2]/text()').extract()[0]
item['port'] = tr.xpath('td[3]/text()').extract()[0]
item['type'] = tr.xpath('td[6]/text()').extract()[0]
yield item