一个WXPython+采集的 Demo

# -*-coding:utf-8 -*-
# 勿喷,很早之前写着玩的

from __future__ import division
import wxversion,sys,os
wxversion.select('3.0-msw-unicode')
import wx,urllib,urllib2, base64,re,sys,os,time
reload(sys)
sys.setdefaultencoding('UTF-8')
encoding = "UTF-8"

percent =0
#http://www.61ertong.com/plus/list.php?tid=10&TotalResult=1199	

class ButtonFrame(wx.Frame):

    def __init__(self):
        wx.Frame.__init__(self, None, -1, 'Button Example',size=(600, 200),style =wx.DEFAULT_FRAME_STYLE ^ (wx.RESIZE_BORDER | wx.MAXIMIZE_BOX)  | wx.CLIP_CHILDREN | wx.NO_FULL_REPAINT_ON_RESIZE)
        panel = wx.Panel(self, -1)
        self.count = 0
        self.btn_down = wx.Button(panel, -1, u"下载", pos=(200, 115))
        self.btn_exit = wx.Button(panel, -1, u"退出", pos=(320, 115))     
        self.url_text=wx.TextCtrl(panel,-1,'http://www.61ertong.com/flash/gequ/cngequ/show/f_60545.html',size=(450,-1),pos=(110, 45))

        self.Bind(wx.EVT_BUTTON, self.OnClickButton, self.btn_down)
        self.processBar = wx.Gauge(panel, -1, 100, pos=(110, 80),size=(420, 15), style = wx.GA_PROGRESSBAR)
        self.processBar.SetBezelFace(3)
        self.processBar.SetShadowWidth(3)

        self.Label_info=wx.StaticText(panel,-1,u"61儿童网地址:",pos=(20, 50))
        self.Label_proce=wx.StaticText(panel,-1,u"进度:",pos=(65, 80))
        self.Label_Url=wx.StaticText(panel,-1,u"" ,pos=(20, 20))
        self.Label_Percent=wx.StaticText(panel,-1,u"0",pos=(530, 80))
        self.Label_Flag=wx.StaticText(panel,-1,u"%",pos=(550, 80))

        '''前面一个是关掉弹窗,后面一个是关掉程序'''
        self.Bind(wx.EVT_BUTTON, self.OnClose, self.btn_exit)  
        self.Bind(wx.EVT_CLOSE, self.OnCloseWindow,self.btn_down)

    def OnClose(self, event):
        ret = wx.MessageBox(u'您确定要退出本程序吗',  u'WxPython Info', wx.OK|wx.CANCEL)
		# dlg = wx.MessageDialog(self, 'Hello from Python and wxPython!',
                                # 'A Message Box',
                                # wx.OK | wx.ICON_INFORMATION|wx.YES_NO | wx.NO_DEFAULT
                                # )
        # dlg.ShowModal()
        # dlg.Destroy()
        if ret == wx.OK:
            self.Destroy()

    @staticmethod
    def urlcallback(a,b,c):
    	global obj 
    	per = 100.0 * a * b / c
    	percent = int(per)
    	obj.processBar.SetValue(percent)
    	obj.Label_Percent.SetLabel(str(percent))
    
    def OnCloseWindow(self, event):
        self.Close()

    def OnClickButton(self, event):
		url = self.url_text.GetValue()
		self.processBar.SetValue(1)		
		if url != '' :
			try:
				#最近不伪造header头采集不到
				#request = urllib2.Request(url)
				headers = {'User-Agent':'Mozilla/5.0 (X11; U; Linux i686)Gecko/20071127 Firefox/2.0.0.11'}
				req = urllib2.Request(url=url,headers=headers)
				socket = urllib2.urlopen(req)
				html_content = socket.read()
				socket.close()

			except Exception ,ex:
				print ex
				wx.MessageBox(u"没有抓取到远程文件",u'WxPython Warning',wx.OK|wx.CANCEL|wx.ICON_ERROR)
				return 
			'''默认都是抓Swf文件'''
			re_swf = re.compile('swfurl\:"(.*?)"',re.S|re.U|re.I)
			re_title = re.compile('<h1><a href="(.*)">(.*)</a><\/h1>',re.S|re.U)
			swfs = re.findall(re_swf,html_content)
			
			file_object = open('thefile.txt','w')
			try:
				file_object.write(html_content)
			finally:
				file_object.close()
			
			titles = re.findall(re_title,html_content)
			
			swfurl=base64.decodestring(swfs[0])
			file_ext ='.swf'

			'''swf是空的,那么就是flv'''
			if swfurl =='http://img.61gequ.com':
				re_swf = re.compile('flvurl\:"(.*?)"',re.S|re.U)
				swfs = re.findall(re_swf,html_content)
				swfurl=base64.decodestring(swfs[0])
				file_ext = '.flv'		
					
			'''获取远程文件大小'''
			opener  = urllib2.build_opener()  
			request = urllib2.Request(swfurl)  
			request.get_method = lambda: 'HEAD'		
			try:  
				response = opener.open(request)  
				response.read()
			except Exception, e:
				ret = wx.MessageBox(u'发生错误 %s'%e,  u'WxPython Info', wx.OK|wx.CANCEL|wx.ICON_ERROR)
			else:
				swflen = dict(response.headers).get('content-length', 0)
			
			self.Label_Url.SetLabel(swfurl+"  "+str(round(int(swflen)/1024/1024,2)) +"M")

			if os.path.exists("D:/py/videos") == False:
				#print u"在当前目录下建立文件夹videos成功"
				try:
					os.mkdir("D:/py/videos");
				except:
					wx.MessageBox(u'建立文件夹失败 ',  u'WxPython Info', wx.OK|wx.ICON_ERROR)
					return

			'''如果远程视频长度小于10,那肯定是出错了'''
			if int(swflen) >10:
				urllib.urlretrieve(swfurl, "D:/py/videos/"+titles[0][1].decode('utf-8')+file_ext,self.urlcallback)		
				if  int(os.path.getsize(r"D:/py/videos/"+titles[0][1].decode('utf-8')+file_ext)) !=int(swflen) :
					wx.MessageBox(u'好像下载失败了,视频长度过段',  u'WxPython Info', wx.OK|wx.ICON_ERROR)	
				else:
					return 	
			else:
				wx.MessageBox(u"没有抓取到远程文件",  u'WxPython Info', wx.OK|wx.ICON_ERROR)	
		else:
			wx.MessageBox(u'请输入下载地址',u'WxPython Warning',wx.OK|wx.CANCEL|wx.ICON_ERROR)
		return
	
if __name__ == '__main__':  
	app = wx.PySimpleApp()
	frame = ButtonFrame()
	obj = frame
	frame.Center()
	frame.Show()
	frame.SetTitle(u"61儿童网下载器")
	frame.MAXIMIZE_BOX =False
	app.MainLoop()

猜你喜欢

转载自blog.csdn.net/swg0110/article/details/46823269