Python爬虫之定时给TA发送微信消息（十七）

原创不易，转载前请注明博主的链接地址：Blessy_Zhu https://blog.csdn.net/weixin_42555080
本次代码的环境：
运行平台： Windows
Python版本： Python3.x
IDE： PyCharm

一、前言

你的朋友圈中一定会有那么一些人，让你一直念念不忘：亦或是“你本无意穿堂风，偏偏孤倨引山洪。我是垂眉摆渡翁，却独独偏爱侬”的那个TA；亦或是“衣上辛勤慈母线，箧中珍重故人书。白云红树堪移棹，万壑千峰拟结庐”的母亲；亦或是“落日斜，秋风冷。今夜故人来不来，教人立尽梧桐影”的挚友。俗话说，远亲不如近邻。
对朋友很久没联系
不是因为感情变了
而是因为生活变了
毕竟年龄增长了
肩负的责任重了
身边的牵挂多了……
何不如，写一个程序，每天都给TA发送，TA喜欢的、或者是你想对TA说的话。今天，就利用Python中的wxpy模块，将你想说的话发给TA吧！！！！！！！！！！！！！！！！
　

二、思路过程

首先，选择一个自己想要发信息的对象——我这里选择的是我的弟弟（微信备注名：老弟），我想要给他发的信息是每天郑州的天气，以及每天分享一个小故事。
对于郑州的天气，可以通过网站中国天气【网址：http://www.weather.com.cn/ 】来访问。
在这里插入图片描述
输入郑州，切换到郑州的天气预报，并且拼接完成即可。
　

这是一个静态网页，就不在具体介绍它的爬取过程了。
接下来，要爬取的是小故事，可以通过网站儿童资源网，【网址： http://www.tom61.com/ertongwenxue/shuiqiangushi/index.html 】来爬取小故事信息。
在这里插入图片描述

随便点开一个，如《幸福王国》，可以看到，这个也是静态网页。具体的分析过程也不再赘述了。
　

接下里就是找到发送好友并且发送信息,可以通过Python中的wxpy模块，来实现：

from wxpy import *
bot = Bot()  # 登陆网页微信，并保存登陆状态
def sendmsg(content):
    # 搜索自己的好友，注意中文字符前需要+u
    my_group = bot.friends().search(u'***')[0]
    my_group.send(content)  # 发送MSG

三、代码实现与结果展示

首先给出爬取小故事的代码：

# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import os
import time

def getHTMLText(url,headers):
    try:
        r=requests.get(url,headers=headers,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return "爬取失败"

def parsehtml(namelist,urllist,html):
    url='http://www.tom61.com/'
    soup=BeautifulSoup(html,'html.parser')
    t=soup.find('dl',attrs={'class':'txt_box'})
    i=t.find_all('a')
    for link in i:
        urllist.append(url+link.get('href'))
        namelist.append(link.get('title'))
    print(urllist)
    print(namelist)
    return urllist,namelist

def main():
    headers = {'User-Agent':'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
               }
    urllist=[]#定义存放故事URL的列表
    namelist=[]#定义存放故事名字的列表
    if not os.path.exists('myshortStories'):#将所有的故事放到一个目录下
        os.mkdir('myshortStories')
    for i in range(1,20):#控制分页（每页有70个故事）
        if i==1:#爬取故事的地址和故事名
            url='http://www.tom61.com/ertongwenxue/shuiqiangushi/index.html'
        else:
            url='http://www.tom61.com/ertongwenxue/shuiqiangushi/index_'+str(i)+'.html'
        print ("正在爬取第%s页的故事链接：" % (i))
        print (url+'\n')
        html=getHTMLText(url,headers)
        urls,storiesNames = parsehtml(namelist,urllist,html)
        littleStories = []
        m=0
        for url in urls:#通过URL在爬取具体的故事内容
            print('已经爬取了'+str(((i-1)*70+m))+'篇文章')
            littlestory = ''
            storyhtml = getHTMLText(url,headers)
            soup = BeautifulSoup(storyhtml, 'html.parser')
            t = soup.find('div', class_='t_news_txt')
            ptexts = t.find_all('p')
            for ptext in ptexts: #将一个故事作为数列的一个元素
                storytext = ptext.text
                littlestory = littlestory+storytext
            littleStories.append(littlestory.replace('\u3000\u3000',''))
            m=m+1
            time.sleep(1)
        myLittleStorySets = dict(zip(storiesNames,littleStories))#将故事名和故事内容拼接成字典的形式
        print("爬取链接完成")
        k=0
        for storyName, storyContent in myLittleStorySets.items():
            textName = 'myshortStories/'+'Day'+str(((i-1)*70+k))+'.txt'#为每个故事建立一个TXT文件
            with open(textName, 'a', encoding='utf-8') as f:
                f.write(storyName)
                f.write('\n')
                f.write(storyContent)
                k = k + 1
                print("正在写入Day"+str(((i-1)*70+k))+"故事")
                time.sleep(1)

if __name__=='__main__':
    main()

结果如下所示：
在这里插入图片描述
随便打开一个爬取好的文章，进行展示：
　

然后是获取天气信息：

def getZZWeatherAndSendMsg():
    # 要改为自己想要获取的城市的url，下面是郑州的url
    resp = urlopen('http://www.weather.com.cn/weather1d/10118010105A.shtml')
    soup = BeautifulSoup(resp, 'html.parser')
         # 获取温度数据
    tem = soup.find('p', class_="tem").find('span').string
        # 第一个包含class="tem"的p标签即为存放今天天气数据的标签
        # 获取天气状况
    weather = soup.find('p', class_="wea").string
        # 获取风力情况
    win = soup.find('p', class_="win").find('span').string
         # 获取日出时间
    sun = soup.find('p', class_="sun sunUp").find('span').string
     # 拼接要发送的消息格式
    contents = '郑州今日：' + '\n' + '天气：' + weather + '\n' + '温度：' + tem + '℃' + '\n' + '风力：' + win + '\n' + sun + '\n' + '注意天气变化！！'

然后拼装信息，发送给指定的人：

    for i in range(100):
        fileName = './myshortStories/'+'Day'+str(i)+'.txt'
        storyContent = open(fileName,encoding='utf-8').read()
        sentContents = weatherContent+ '\n\n'+'每天给你讲一个小故事，今天的是：'+ '\n\n'+storyContent
        sendblogmsg(sentContents)
         # 设置每天发送一次v
        #t = Timer(24*60*60,job)
        #t.start()
        time.sleep(24 * 60 * 60)

最终结果为：

在这里插入图片描述

四、总结

这篇文章主要内容是爬取两个静态网站，分别是《儿童资源网》和《中国天气网》，然后将爬到的数据，截取发送给指定的人。这篇文章就到这里了，欢迎大佬们多批评指正，也欢迎大家积极评论多多交流。
　

附完整代码：

# -*- coding: utf-8 -*-
import requests
import os
import time
from urllib.request import urlopen
from bs4 import BeautifulSoup
from wxpy import *

bot = Bot()  # 登陆网页微信，并保存登陆状态
def sendblogmsg(content):
    # 搜索自己的好友，注意中文字符前需要+u
    my_group = bot.friends().search(u'**你要发送的对象微信备注名**')[0]
    my_group.send(content)  # 发送天气预报

def getHTMLText(url,headers):
    try:
        r=requests.get(url,headers=headers,timeout=30)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return "爬取失败"

def parsehtml(namelist,urllist,html):
    url='http://www.tom61.com/'
    soup=BeautifulSoup(html,'html.parser')
    t=soup.find('dl',attrs={'class':'txt_box'})
    i=t.find_all('a')
    for link in i:
        urllist.append(url+link.get('href'))
        namelist.append(link.get('title'))
    print(urllist)
    print(namelist)
    return urllist,namelist

def mylittlestory():
    headers = {'User-Agent':'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
               }
    urllist=[]#定义存放故事URL的列表
    namelist=[]#定义存放故事名字的列表
    if not os.path.exists('myshortStories'):#将所有的故事放到一个目录下
        os.mkdir('myshortStories')
    for i in range(1,2):
        if i==1:#爬取故事的地址和故事名
            url='http://www.tom61.com/ertongwenxue/shuiqiangushi/index.html'
        else:
            url='http://www.tom61.com/ertongwenxue/shuiqiangushi/index_'+str(i)+'.html'
        print ("正在爬取第%s页的故事链接：" % (i))
        print (url+'\n')
        html=getHTMLText(url,headers)
        urls,storiesNames = parsehtml(namelist,urllist,html)
        littleStories = []
        m=0
        for url in urls:#通过URL在爬取具体的故事内容
            print('已经爬取了'+str(((i-1)*70+m))+'篇文章')
            littlestory = ''
            storyhtml = getHTMLText(url,headers)
            soup = BeautifulSoup(storyhtml, 'html.parser')
            t = soup.find('div', class_='t_news_txt')
            ptexts = t.find_all('p')
            for ptext in ptexts: #将一个故事作为数列的一个元素
                storytext = ptext.text
                littlestory = littlestory+storytext
            littleStories.append(littlestory.replace('\u3000\u3000',''))
            m=m+1
            time.sleep(1)
        myLittleStorySets = dict(zip(storiesNames,littleStories))#将故事名和故事内容拼接成字典的形式
        print("爬取链接完成")
        k=0
        for storyName, storyContent in myLittleStorySets.items():
            textName = 'myshortStories/'+'Day'+str(((i-1)*70+k))+'.txt'#为每个故事建立一个TXT文件
            with open(textName, 'a', encoding='utf-8') as f:
                f.write(storyName)
                f.write('\n')
                f.write(storyContent)
                k = k + 1
                print("正在写入Day"+str(((i-1)*70+k))+"故事")
                time.sleep(1)

def getZZWeatherAndSendMsg():
    # 要改为自己想要获取的城市的url，下面是青岛的url
    resp = urlopen('http://www.weather.com.cn/weather1d/10118010105A.shtml')
    soup = BeautifulSoup(resp, 'html.parser')
     # 获取温度数据
    tem = soup.find('p', class_="tem").find('span').string
    # 第一个包含class="tem"的p标签即为存放今天天气数据的标签
    # 获取天气状况
    weather = soup.find('p', class_="wea").string
    # 获取风力情况
    win = soup.find('p', class_="win").find('span').string
     # 获取日出时间
    sun = soup.find('p', class_="sun sunUp").find('span').string
     # 拼接要发送的消息格式
    weatherContent = '郑州今日：' + '\n' + '天气：' + weather + '\n' + '温度：' + tem + '℃' + '\n' + '风力：' + win + '\n' + sun + '\n' + '注意天气变化！！'
    i=0
    for i in range(100):
        fileName = './myshortStories/'+'Day'+str(i)+'.txt'
        storyContent = open(fileName,encoding='utf-8').read()
        sentContents = weatherContent+ '\n\n'+'每天给你讲一个小故事，今天的是：'+ '\n\n'+storyContent
        sendblogmsg(sentContents)
         # 设置每天发送一次v
        #t = Timer(24*60*60,job)
        #t.start()
        time.sleep(24 * 60 * 60)

if __name__=='__main__':
    mylittlestory()
    getZZWeatherAndSendMsg()