小说

import os
import requests#self
#from pyquery import PyQuery as pq
from lxml import etree
from fake_useragent import UserAgent
from time import sleep
ua = UserAgent()
headers = {'User-Agent':ua.ie}
print(ua.ie)
print(headers)
url = 'http://www.xbiquge.la/7/7552/'
def text():
r = requests.get(url,headers).content.decode('utf-8')
#print(r)
return r
text()
#print(text())
def xpath():
for i in range(502):
sleep(3)
lxml = etree.HTML(text())
#print(lxml)
a = '//div[@id="list"]/dl/dd[%s]/a/@href'%i
b = '//div[@id="list"]/dl/dd[%s]/a/text()'%i
html = lxml.xpath(a)
href = lxml.xpath(b)
path = os.path.abspath('F://nei/xiaoshu/%s.txt'%i)
#print(html)
#print(href)
for i in html:
c = 'http://www.xbiquge.la' + i
print(c)
for w in href:
print('下载' + w)
txxt = requests.get(c)
tmt = txxt.content.decode('utf-8')
lxml_1 = etree.HTML(tmt)
text_1 = lxml_1.xpath('//div[@id="content"]/text()')
f = open(path,'wb+')
print(path)
f.write((w+'\n').encode('utf-8'))
for we in text_1:
#print(we)
f.write(('\n'+we+'\n' ).encode('utf-8'))
f.close()

xpath()

猜你喜欢

转载自blog.51cto.com/14232326/2400163
今日推荐