import pymysql import requests from bs4 import BeautifulSoup import lxml message_list = [] def get_content(): url = "http://www.scetc.cn/reList" headers = {"User-Agent": "Mozilla/5.0(compatible;MSIE 9.0;Windows NT 6.1;Trident / 5.0)"} response = requests.request(url=url, headers=headers,method="GET") response.encoding = 'utf-8' html = response.text return html def get_path(): html = get_content() soup = BeautifulSoup(html,'lxml') list = soup.select('div[class="newsbox"] ul li a') for a in list: href = a['href'] message_list.append(href) def add(name,site,time,place,major,remark): con = pymysql.connect(host='localhost', user='root', password='123456', database='test') cursor = con.cursor() sql = "insert into employment(name,site,time,place,major,remark)values (%s,%s,%s,%s,%s,%s)" infor_list = [name, site, time, place, major, remark] cursor.execute(sql, infor_list) cursor.close() con.close() print("数据存储成功!") def data_store(): get_path() for path in message_list: url = "http://www.scetc.cn/"+path headers = {"User-Agent": "Mozilla/5.0(compatible;MSIE 9.0;Windows NT 6.1;Trident / 5.0)"} response = requests.request(url=url, headers=headers, method="GET") response.encoding = 'utf-8' htmls = response.text soup = BeautifulSoup(htmls, 'lxml') list = soup.select('div[class="flat-wrapper"] table tr td') employment = [] for a in list: content = a.string employment .append(content) print(employment) #add(employment[1], employment[3], employment[5], employment[7], employment[9], employment[11]) if __name__=='__main__': data_store()
Python数据爬取,并存储到mysql数据库
猜你喜欢
转载自blog.csdn.net/weixin_57803787/article/details/124873903
今日推荐
周排行