python-翻译器

版权声明: https://blog.csdn.net/qq_25233621/article/details/85048398

1.掌握python,爬虫的相关知识

2.开始实践,环境准备(安装googletrans,requests,beautifulsoup库)

3.选取目标-google,金山词霸

4.上代码

from googletrans import Translator
import requests, re
from bs4 import BeautifulSoup
import sys

# anthor : Comiii
# 2018/12/4


class Tranlate():
    Result = ""
    # 谷歌库 -- 有其他人写好的一个库文件,直接引用
    def __init__(self, text, flag):
        translator = Translator()
        if (flag == 1):  # 中文
            result = translator.translate(text, dest="EN")
            # print(result.text)
        elif (flag == 2):  # 英语
            result = translator.translate(text, dest="zh-CN")
            # print(result.text)
        elif (flag == 3):  # 日语
            result = translator.translate(text, dest="ja")
        elif(flag == 4):
            result = translator.translate(text, dest="zh-CN")
        self.Result = result.text


class Spider():
    # 爬取金山词霸
    Result = ''
    Soup = ''

    def __init__(self, KWord):

#         url="http://www.youdao.com/w/"+KWord+"/#keyfrom=dict2.top"  有道词典
        url = "http://www.iciba.com/" + KWord  # 金山词霸,所有查找类型一个形式

        bs = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'}
        r = requests.get(url, headers=bs, timeout=60)
        self.Result = r.text
        self.CheckBeautifulsoup()

    #  不总是有效 into 有道词典,useful into 金山词霸
    #  beautifulsoup
    def CheckBeautifulsoup(self):
        soup = BeautifulSoup(self.Result, "html.parser")
        # 词霸翻译器爬取
        try:
            #  <div style="width: 580px; margin-top: 15px; font-size: 15px; line-height: 24px; color: #333333;">你在说什么?</div>
            for div in soup.find_all(name='div', style="width: 580px; margin-top: 15px; font-size: 18px; line-height: 24px; color: #333333;"):
                soup = div.find(text=True).strip()
                self.Soup = soup
        except:
            print(" ")
        # 网页爬取
        try:
            for li in soup.find_all(name='li', attrs='clearfix'):
                for span in li.find_all(name='span'):
                    soup = span.find(text=True).strip()
                    # print(soup)  # 多个结果,全部显示,最终使用
                    self.Soup = soup
            # 单个结果,可能翻译对象不对
            # print(soup)
        except:
            print("")

    # 不想写了怎么办呜呜  .QAQ .
    #                    ..     ..
    #                   ..        ...
    # re正则表达式
    def CheckRe(self):
        pattern = re.compile(r'')


if __name__ == "__main__":
                 
    text = "你好"
    flag = 1 # 不同flag 代表不同的翻译类别
#     print(text)
#     print(flag)
    tra = Tranlate(text,flag)    #google
    print(tra.Result)
    
    kingt = Spider(text)  # 金山词霸,会出现有结果却不显示的问题
    print(kingt.Soup)


5.总结:太简单了,一个轻量级网页爬取,没什么好总结的!

猜你喜欢

转载自blog.csdn.net/qq_25233621/article/details/85048398