Python3爬虫Google翻译

googletranslate.js:

TKK=eval('((function(){var a\x3d2065549123;var b\x3d-1264353036;return 422288+\x27.\x27+(a+b)})())');
return TKK;

googletranslate_1.js:

function b(a, b) {    
  for (var d = 0; d < b.length - 2; d += 3) {    
      var c = b.charAt(d + 2),    
          c = "a" <= c ? c.charCodeAt(0) - 87 : Number(c),    
          c = "+" == b.charAt(d + 1) ? a >>> c : a << c;    
      a = "+" == b.charAt(d) ? a + c & 4294967295 : a ^ c    
  }    
  return a    
}    
    
function tk(a,TKK) {    
    for (var e = TKK.split("."), h = Number(e[0]) || 0, g = [], d = 0, f = 0; f < a.length; f++) {    
        var c = a.charCodeAt(f);    
> c ? g[d++] = c : (2048 > c ? g[d++] = c >> 6 | 192 : (55296 == (c & 64512) && f + 1 < a.length && 56320 == (a.charCodeAt(f + 1) & 64512) ? (c = 65536 + ((c & 1023) << 10) + (a.charCodeAt(++f) & 1023), g[d++] = c >> 18 | 240, g[d++] = c >> 12 & 63 | 128) : g[d++] = c >> 12 | 224, g[d++] = c >> 6 & 63 | 128), g[d++] = c & 63 | 128)    
    }    
    a = h;    
    for (d = 0; d < g.length; d++) a += g[d], a = b(a, "+-a^+6");    
    a = b(a, "+-3^+b+-f");    
    a ^= Number(e[1]) || 0;    
> a && (a = (a & 2147483647) + 2147483648);    
    a %= 1E6;    
    return a.toString() + "." + (a ^ h)    
}

main.py:

import requests  
import json  
import sys  
import urllib  
from bs4 import  BeautifulSoup  
import re  
import execjs  
import os  
import numpy as np
  
  
      
class  Translate:  
    def __init__(self,query_string):  
        self.api_url="https://translate.google.cn"  
        self.query_string=query_string  
        self.headers={  
            "User-Agent":"Mozilla/5.0 (Windows NT 6.1; rv:53.0) Gecko/20100101 Firefox/53.0"  
                      }  
          
    def get_url_param_data(self):  
        url_param_part=self.api_url+"/translate_a/single?"  
        url_param=url_param_part+"client=t&sl=es&tl=fr&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&source=btn&ssel=3&tsel=3&kc=0&"  
        #sl为源语言,tl为目标语言
        url_get=url_param+"tk="+str(self.get_tk())+"&q="+str(self.get_query_string())  
        #print(url_get)  
        return  url_get  
      
    def get_query_string(self):  
        query_url_trans=urllib.parse.quote(self.query_string)#汉字url编码  
        return  query_url_trans  
       
    def get_tkk(self):  
        part_jscode_2="\n"+"return TKK;"  
        tkk_page=requests.get(self.api_url,headers=self.headers)  
        tkk_code=BeautifulSoup(tkk_page.content,'lxml')  
        patter= re.compile(r'(TKK.*?\);)', re.I | re.M)  
        part_jscode=re.findall(patter,str(tkk_code))  
        #print(part_jscode[0])  
        js_code=part_jscode[0]+part_jscode_2  
        with open ("googletranslate.js","w")  as  f:  
            f.write(js_code)  
            f.close  
        tkk_value=execjs.compile(open(r"googletranslate.js").read()).call('eval')  
        #print(tkk_value)  
        return tkk_value  
      
    def get_tk(self):  
        tk_value=execjs.compile(open(r"googletranslate_1.js").read()).call('tk',self.query_string,self.get_tkk())  
        #print(tk_value)  
        return tk_value  
            
      
    def parse_url(self):  
        response=requests.get(self.get_url_param_data(),headers=self.headers)  
        return response.content.decode()  
      
      
    def  get_trans_ret(self,json_response):  
        dict_response=json.loads(json_response)  
        ret=dict_response[0][0][0]  
        #print(ret) 
        return ret 
          
          
    def  run(self):  
        json_response=self.parse_url()  
        n=self.get_trans_ret(json_response)
        return n  
         
         
if  __name__=="__main__":
    vocab = np.load('vocabEN-ES.npy')
    vocab_array=np.array(['',''])
    for i in range(vocab.shape[0]):
        google=Translate(vocab[i][1].lower())
        row=np.array([vocab[i][1],google.run().lower()])
        vocab_array=np.row_stack((vocab_array,row))
        print('谷歌翻译第%d'%(i+1) + '个单词完成!')
    vocab_array=np.delete(vocab_array,0,0)
    np.save("vocabES-FR.npy", vocab_array)
    a=np.load("vocabES-FR.npy")
    print(a)
    print(a.shape)
    '''f_en = open('test_removed.txt', 'r')
    mystr = f_en.read()
    en_list = mystr.split()
    vocab_array=np.array(['',''])
    for i in range(1280):
        google=Translate(en_list[i])
        row=np.array([en_list[i],google.run().lower()])
        vocab_array=np.row_stack((vocab_array,row))
        print('谷歌翻译第%d'%(i+1)+'个单词完成!')

    vocab_array = np.delete(vocab_array,0,0)
    print(vocab_array)
    np.save("test1000EN-FR.npy", vocab_array)
    f_en.close()'''

三个代码放在同一文件夹,执行python3 main.py即可

猜你喜欢

转载自blog.csdn.net/xyx_HFUT/article/details/81255414