爬虫课程3

关于在腾讯视频用爬虫下载毒品危害的视频代码

import requests

url='http://122.246.10.160/vhot2.qqvideo.tc.qq.com/AERa3MJ-4PsBlS8oP_ZsuxlwJzm6Uf_hTxhVT81JTZiQ/b0175dfvbs1.mp4?sdtfrom=v1010&guid=1e962c528e492cd4f3ba7d926c487ae0&vkey=F41F7658B38D1E31D183CC4B255496D7BD92F9E63CDB8BC3F23882BCCB2DB34DB184F5675B25A2AD613E3F0BA1D00A49BF5BA21B930D8DADECD73D91C62A27F29A521D4705C293D0BA7C5AA1B1AF4D2E377E3CA5BE594132F8FA7C630F87DA39BC2D0EB8DE9B7E80CABF51D82153054C42E383A6784B0FDF'
res=requests.get(url)
with open("毒品的危害.mp4",'wb') as f:
    f.write(res.content)

关于爬取2018杭州某高中的录取线代码

#!/usr/bin/env python
# -*- coding:utf-8 -*-
# __author: __nash

import requests
from bs4 import BeautifulSoup


url="http://hz.zhongkao.com/e/20170630/5955d2689a6b6.shtml"
res=requests.get(url)
res.encoding='gbk'
soup=BeautifulSoup(res.text,'lxml')
text1=soup.select('.content p a')[0]
print(text1)
print(type(text1))
print(text1['href'])
print(type(text1['href']))
url2=text1['href']
res2=requests.get(url2)
res2.encoding='gbk'
# print(res2.text)
soup2=BeautifulSoup(res2.text,'lxml')
# print(soup2)
img=soup2.select('img')[4]
url3=img['src']
img2=requests.get(url3)
f=open('score.png','wb')
f.write(img2.content)
f.close()

关于爬取2018杭州高中各个高校的录取线地址

import requests
from bs4 import BeautifulSoup


url='http://hz.zhongkao.com/zkzx/hzzkfsx/'
res=requests.get(url)
res.encoding='gbk'
soup=BeautifulSoup(res.text,'html.parser')
text1=soup.select('.ft16')[0].text
print(text1)
text2=soup.select('.bk-colkey a')

# for i in text2:
#     print(i['href'],i.text)
text3=text2[0:31]
for i in text3:
    print(i['href'],i.text)

with open('school.txt','w')as f:
    f.write(text1+'\n')
    for i in text3:
        f.write(i['href']+'\t'+i.text+'\n')

xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

猜你喜欢

转载自www.cnblogs.com/jinpan/p/9583988.html