目标网站
找到请求地址,发现没有加密,直接请求,用xpath提取数据
完整代码:
import requests
from lxml import etree
url = 'http://www.mypricemix.com/wap/goods_detail/fc73a3511f8911e98985525400a3d313.html'
resp = requests.get(url)
# print(resp.text)
eroot = etree.HTML(resp.text)
goods_name = eroot.xpath('/html/body/header/h1/text()')
print('商品名称:',goods_name)
pictures_ele = eroot.xpath('/html/body/div[2]/div[2]/div/div')
for picture_ele in pictures_ele:
pic_src = picture_ele.xpath('./a/img/@src')
print('图片url:',pic_src)
goods_num = eroot.xpath('/html/body/div[3]/table/tbody/tr[1]/td[1]/span[2]/text()')
print('货号',goods_num)
detail = eroot.xpath('/html/body/div[3]/table/tbody/tr[2]/td/span/text()')
print('细节特征',detail)
print('以下折合人民币价格均按照今日最新汇率计算')
update_time = eroot.xpath('/html/body/div[4]/span[2]/text()')
print(update_time)
prices_ele = eroot.xpath('/html/body/div[4]/div')
for price_ele in prices_ele:
url = price_ele.xpath('./ul/a/@href')
# 欧洲不一样,另外写个规则
if not url:
url = price_ele.xpath('./ul/li/@data-url')
countries_ele = price_ele.xpath('./ul/li')
for country in countries_ele:
country_name = country.xpath('./text()')
print('国家:',country_name)
else:
country = price_ele.xpath('./ul/a/li/text()')
print('国家:',country)
print('官网链接:',url)
local_price = price_ele.xpath('./div[1]/span/text()')
print('官方售价:',local_price)
rmb_price = price_ele.xpath('./div[2]/span/text()')
print('折合人民币:', rmb_price)
print('*'*100)