使用爬虫利用有道翻译网站做一个翻译接口

import urllib.request
import http.client
import re
from urllib import request,parse

def getTInfo(key):   
    # 通过抓包的方式获取的url,并不是浏览器上显示的url
    url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null"

    # 完整的headers
    headers = {
            "Accept" : "application/json, text/javascript, */*; q=0.01",
            "X-Requested-With" : "XMLHttpRequest",
            # 这里可以导入上次写的User-Agent池
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0",
            "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8",
        }
    
    formdata = {
    "i":key,
    "from":"auto",
    "to":"auto",
    "smartresult":"dict",
    "client":"fanyideskweb",
    "salt":"1511219405946",
    "sign":"f8965f67a1d3eee8a69ddf8ccc5f582b",
    "doctype":"json",
    "version":"2.1",
    "keyfrom":"fanyi.web",
    "action":"FY_BY_REALTIME",
    "typoResult":"false"
    }
    # 做一次urlencode
    data=bytes(parse.urlencode(formdata),encoding='utf-8')
    #利用Request将headers,dict,data整合成一个对象传入urlopen
    req = request.Request(url,data,headers,method='POST')
    response=request.urlopen(req)
    info = response.read().decode('utf-8') # 这里需要做一次utf-8的转码
    # 因为这里需要对信息做文本的处理
    
    # 使用正则表达式提取翻译出的信息
    strRule = re.compile('"tgt":(.*?)}')
    info2 = strRule.findall(info)
    for i in info2:
               i = i.replace('"',"")
    return info2[0]

# 执法仪输入的单词
#if __name__ == "__main__":
#    info = input("请输入您要翻译的英文单词:")
#    print(getTInfo(info))

def info(object, spacing=15, collapse=0):
    """
     Print methods and doc strings. Take module, class,
     dictionary, or string.
    """
    # 遍历一遍object对象,把里面的可以被调用的方法提取出来
    methodList = [method for method in dir(object)
                  if callable(getattr(object, method))]
    
    # 把要提取出来的方法以更好看的,多行变单行
    #collapse可以控制打印的信息是否换行
    #collapse = 1,并行
    #collapse = 0,保持原来的显示
    processFunc = collapse and (lambda s:" ".join(s.split())) or (lambda s:s)
    print('\n'.join(["%s %s"%(method.ljust(spacing), processFunc(getTInfo(getattr(object, method).__doc__))) for method in methodList]))

# 测试代码

# 运行出来的结果,当然只能作为参考

提供给像我这样英语不好的同学查方法用

猜你喜欢

转载自blog.csdn.net/wjun0707/article/details/81159286