HttpWebRequest 模拟浏览器访问网站

最近抓网页时报错:
要么返回 The remote server returned an error: (442)
要么返回: 非法访问,您的行为已被WAF系统记录!
想了想,就当是人家加了抓网页的东西,于是改了一下方法 加上Request.Header 之类的东西就行了。
具体加什么,咱们可以先用 fildder 抓一下包就可以了如:
 
GET http://www.baidu.com/ HTTP/1.1
Host: www.baidu.com
Connection: keep-alive
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.9
  

  

 1 public static string GetHtml()
 2         {
 3             string url = "http://www.baidu.com";
 4             string Html = string.Empty;//初始化新的webRequst
 5             HttpWebRequest Request = (HttpWebRequest)WebRequest.Create(url);
 6             Request.Timeout = 300000;
 7             Request.ReadWriteTimeout = 300000;
 8          //   Request.ImpersonationLevel = TokenImpersonationLevel.Anonymous;
 9           
10             Request.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
11           //  Request.Headers.Add("Accept-Encoding", "gzip, deflate");
12       
13             Request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;           
14             Request.KeepAlive = true;
15             Request.ProtocolVersion = HttpVersion.Version11;
16             Request.Method = "GET";
17             Request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
18             Request.Host = "www.baidu.com";
19             //Request.Accept = "text/json,*/*;q=0.5";
20             //Request.Headers.Add("Accept-Charset", "utf-8;q=0.7,*;q=0.7");
21             //Request.Headers.Add("Accept-Encoding", "gzip, deflate, x-gzip, identity; q=0.9");
22             Request.UserAgent = @"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36";
23             Request.Referer = url;
24             Request.IfModifiedSince = DateTime.UtcNow;
25 
26             HttpWebResponse htmlResponse = (HttpWebResponse)Request.GetResponse();
27             //从Internet资源返回数据流
28              Stream htmlStream = htmlResponse.GetResponseStream();
29            // Stream htmlStream = new System.IO.Compression.GZipStream(htmlResponse.GetResponseStream(), System.IO.Compression.CompressionMode.Decompress);
30             //读取数据流
31             StreamReader weatherStreamReader = new StreamReader(htmlStream, Encoding.GetEncoding("gb2312"));
32             //读取数据
33             Html = weatherStreamReader.ReadToEnd();
34             weatherStreamReader.Close();
35             htmlStream.Close();
36             htmlResponse.Close();
37             //针对不同的网站查看html源文件
38             return Html;
39         }      

猜你喜欢

转载自www.cnblogs.com/newr2006/p/9212797.html