使用python的pyquery简单爬取数据demo

#!/bin/env python
#_*_ coding: utf-8 _*_

from pyquery import PyQuery as pq
import time
import random


def get_appinfo_from_yyb(app_pack,storenum):
	url=xxxxxxxxxxxxxx
	data = pq(url)
	if storenum==1:
		app_name = data('.det-name-int').text()
		app_down_cnt = data('.det-ins-num').text().replace(u'下载' ,'')
		app_desc = data('.det-app-data-info').text()
		if len(app_desc)==0:
			return ""
		text = '\t'.join([app_pack, app_name, app_down_cnt, app_desc])

	elif storenum==2:
		app_name = data('title').text()[:-7]
		app_down_cnt = ""
		app_desc = data('.app-text .pslide').text().replace('\n','')
		if len(app_desc)==0:
			return ""
		text = '\t'.join([app_pack, app_name, app_down_cnt, app_desc])

	return text


if __name__ == '__main__':
	storenum = 2
	file_num = "11999.csv"
	input_path= "1filename"+file_num
	output_path = "1result"+file_num
	#time.sleep(3600*6)
	with open(input_path) as fr, open(output_path, "w", encoding='utf-8') as fw:
		print(input_path,output_path)
		num = 1
		for app_apck_name in fr:
			app_apck_name=app_apck_name.strip('\n')
			app_info = get_appinfo_from_yyb(app_apck_name,storenum)
			print(num, app_apck_name, app_info)
			if len(app_info)>0:
				fw.write('%s\n' % app_info)
			num = num +1
			sleep_num = random.randint(100, 3000)/1000
			time.sleep(sleep_num)
""""""

发布了80 篇原创文章 · 获赞 27 · 访问量 6万+

猜你喜欢

转载自blog.csdn.net/abc50319/article/details/96474672