一个比较完善的httpWebRequest 封装,适合网络爬取及暴力破解

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/cqkxzyi/article/details/82985112

大家在模拟http请求的时候,对保持长连接及cookies,http头部信息等了解的不是那么深入。在各种网络请求过程中,发送N种问题。

可能问题如下:

1)登录成功后session保持

2)保证所有cookies回传到服务器

3)http头这么多,少一个,请求可能会失败

4)各种编码问题,gzip等压缩问题

为了解决这些问题,本人花了一天时间写了以下一个类,专门做http请求

复制代码

  1 using System;
  2 using System.Collections.Generic;
  3 using System.IO;
  4 using System.IO.Compression;
  5 using System.Linq;
  6 using System.Net;
  7 using System.Text;
  8 using System.Threading.Tasks;
  9 
 10 namespace ScanWeb
 11 {
 12     //zetee
 13     //不能Host、Connection、User-Agent、Referer、Range、Content-Type、Content-Length、Expect、Proxy-Connection、If-Modified-Since
 14     //等header. 这些header都是通过属性来设置的 。
 15     public class HttpRequestClient
 16     {
 17         static HashSet<String> UNCHANGEHEADS = new HashSet<string>();
 18         static HttpRequestClient()
 19         {
 20             UNCHANGEHEADS.Add("Host");
 21             UNCHANGEHEADS.Add("Connection");
 22             UNCHANGEHEADS.Add("User-Agent");
 23             UNCHANGEHEADS.Add("Referer");
 24             UNCHANGEHEADS.Add("Range");
 25             UNCHANGEHEADS.Add("Content-Type");
 26             UNCHANGEHEADS.Add("Content-Length");
 27             UNCHANGEHEADS.Add("Expect");
 28             UNCHANGEHEADS.Add("Proxy-Connection");
 29             UNCHANGEHEADS.Add("If-Modified-Since");
 30             UNCHANGEHEADS.Add("Keep-alive");
 31             UNCHANGEHEADS.Add("Accept");
 32 
 33             ServicePointManager.DefaultConnectionLimit = 1000;//最大连接数
 34 
 35         }
 36 
 37         /// <summary>
 38         /// 默认的头
 39         /// </summary>
 40         public static string defaultHeaders = @"Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
 41 Accept-Encoding:gzip, deflate, sdch
 42 Accept-Language:zh-CN,zh;q=0.8
 43 Cache-Control:no-cache
 44 Connection:keep-alive
 45 Pragma:no-cache
 46 Upgrade-Insecure-Requests:1
 47 User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36";
 48 
 49         /// <summary>
 50         /// 是否跟踪cookies
 51         /// </summary>
 52         bool isTrackCookies = false;
 53         /// <summary>
 54         /// cookies 字典
 55         /// </summary>
 56         Dictionary<String, Cookie> cookieDic = new Dictionary<string, Cookie>();
 57 
 58         /// <summary>
 59         /// 平均相应时间
 60         /// </summary>
 61         long avgResponseMilliseconds = -1;
 62 
 63         /// <summary>
 64         /// 平均相应时间
 65         /// </summary>
 66         public long AvgResponseMilliseconds
 67         {
 68             get
 69             {
 70                 return avgResponseMilliseconds;
 71             }
 72 
 73             set
 74             {
 75                 if (avgResponseMilliseconds != -1)
 76                 {
 77                     avgResponseMilliseconds = value + avgResponseMilliseconds / 2;
 78                 }
 79                 else
 80                 {
 81                     avgResponseMilliseconds = value;
 82                 }
 83 
 84             }
 85         }
 86 
 87         public HttpRequestClient(bool isTrackCookies = false)
 88         {
 89             this.isTrackCookies = isTrackCookies;
 90         }
 91         /// <summary>
 92         /// http请求
 93         /// </summary>
 94         /// <param name="url"></param>
 95         /// <param name="method">POST,GET</param>
 96         /// <param name="headers">http的头部,直接拷贝谷歌请求的头部即可</param>
 97         /// <param name="content">content,每个key,value 都要UrlEncode才行</param>
 98         /// <param name="contentEncode">content的编码</param>
 99         /// <param name="proxyUrl">代理url</param>
100         /// <returns></returns>
101         public string http(string url, string method, string headers, string content, Encoding contentEncode, string proxyUrl)
102         {
103             HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
104             request.Method = method;
105             if(method.Equals("GET",StringComparison.InvariantCultureIgnoreCase))
106             {
107                 request.MaximumAutomaticRedirections = 100;
108                 request.AllowAutoRedirect = false;
109             }
110             
111             fillHeaders(request, headers);
112             fillProxy(request, proxyUrl);
113 
114             #region 添加Post 参数  
115             if (contentEncode == null)
116             {
117                 contentEncode = Encoding.UTF8;
118             }
119             if (!string.IsNullOrWhiteSpace(content))
120             {
121                 byte[] data = contentEncode.GetBytes(content);
122                 request.ContentLength = data.Length;
123                 using (Stream reqStream = request.GetRequestStream())
124                 {
125                     reqStream.Write(data, 0, data.Length);
126                     reqStream.Close();
127                 }
128             }
129             #endregion
130 
131             HttpWebResponse response = null;
132             System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
133             try
134             {
135                 sw.Start();
136                 response = (HttpWebResponse)request.GetResponse();
137                 sw.Stop();
138                 AvgResponseMilliseconds = sw.ElapsedMilliseconds;
139                 CookieCollection cc = new CookieCollection();
140                 string cookieString = response.Headers[HttpResponseHeader.SetCookie];
141                 if (!string.IsNullOrWhiteSpace(cookieString))
142                 {
143                     var spilit = cookieString.Split(';');
144                     foreach (string item in spilit)
145                     {
146                         var kv = item.Split('=');
147                         if (kv.Length == 2)
148                             cc.Add(new Cookie(kv[0].Trim(), kv[1].Trim()));
149                     }
150                 }
151                 trackCookies(cc);
152             }
153             catch (Exception ex)
154             {
155                 sw.Stop();
156                 AvgResponseMilliseconds = sw.ElapsedMilliseconds;
157                 return "";
158             }
159 
160             string result = getResponseBody(response);
161             return result;
162         }
163 
164         /// <summary>
165         /// post 请求
166         /// </summary>
167         /// <param name="url"></param>
168         /// <param name="headers"></param>
169         /// <param name="content"></param>
170         /// <param name="contentEncode"></param>
171         /// <param name="proxyUrl"></param>
172         /// <returns></returns>
173         public string httpPost(string url, string headers, string content, Encoding contentEncode, string proxyUrl = null)
174         {
175             return http(url, "POST", headers, content, contentEncode, proxyUrl);
176         }
177 
178         /// <summary>
179         /// get 请求
180         /// </summary>
181         /// <param name="url"></param>
182         /// <param name="headers"></param>
183         /// <param name="content"></param>
184         /// <param name="proxyUrl"></param>
185         /// <returns></returns>
186         public string httpGet(string url, string headers, string content=null, string proxyUrl=null)
187         {
188             return http(url, "GET", headers, null, null, proxyUrl);
189         }
190 
191         /// <summary>
192         /// 填充代理
193         /// </summary>
194         /// <param name="proxyUri"></param>
195         private void fillProxy(HttpWebRequest request, string proxyUri)
196         {
197             if (!string.IsNullOrWhiteSpace(proxyUri))
198             {
199                 WebProxy proxy = new WebProxy();
200                 proxy.Address = new Uri(proxyUri);
201                 request.Proxy = proxy;
202             }
203         }
204 
205 
206         /// <summary>
207         /// 跟踪cookies
208         /// </summary>
209         /// <param name="cookies"></param>
210         private void trackCookies(CookieCollection cookies)
211         {
212             if (!isTrackCookies) return;
213             if (cookies == null) return;
214             foreach (Cookie c in cookies)
215             {
216                 if (cookieDic.ContainsKey(c.Name))
217                 {
218                     cookieDic[c.Name] = c;
219                 }
220                 else
221                 {
222                     cookieDic.Add(c.Name, c);
223                 }
224             }
225 
226         }
227 
228         /// <summary>
229         /// 格式cookies
230         /// </summary>
231         /// <param name="cookies"></param>
232         private string getCookieStr()
233         {
234             StringBuilder sb = new StringBuilder();
235             foreach (KeyValuePair<string, Cookie> item in cookieDic)
236             {
237                 if (!item.Value.Expired)
238                 {
239                     if (sb.Length == 0)
240                     {
241                         sb.Append(item.Key).Append("=").Append(item.Value.Value);
242                     }
243                     else
244                     {
245                         sb.Append("; ").Append(item.Key).Append(" = ").Append(item.Value.Value);
246                     }
247                 }
248             }
249             return sb.ToString();
250 
251         }
252 
253         /// <summary>
254         /// 填充头
255         /// </summary>
256         /// <param name="request"></param>
257         /// <param name="headers"></param>
258         private void fillHeaders(HttpWebRequest request, string headers, bool isPrint = false)
259         {
260             if (request == null) return;
261             if (string.IsNullOrWhiteSpace(headers)) return;
262             string[] hsplit = headers.Split(new String[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
263             foreach (string item in hsplit)
264             {
265                 string[] kv = item.Split(':');
266                 string key = kv[0].Trim();
267                 string value = string.Join(":", kv.Skip(1)).Trim();
268                 if (!UNCHANGEHEADS.Contains(key))
269                 {
270                     request.Headers.Add(key, value);
271                 }
272                 else
273                 {
274                     #region  设置http头
275                     switch (key)
276                     {
277 
278                         case "Accept":
279                             {
280                                 request.Accept = value;
281                                 break;
282                             }
283                         case "Host":
284                             {
285                                 request.Host = value;
286                                 break;
287                             }
288                         case "Connection":
289                             {
290                                 if (value == "keep-alive")
291                                 {
292                                     request.KeepAlive = true;
293                                 }
294                                 else
295                                 {
296                                     request.KeepAlive = false;//just test
297                                 }
298                                 break;
299                             }
300                         case "Content-Type":
301                             {
302                                 request.ContentType = value;
303                                 break;
304                             }
305 
306                         case "User-Agent":
307                             {
308                                 request.UserAgent = value;
309                                 break;
310                             }
311                         case "Referer":
312                             {
313                                 request.Referer = value;
314                                 break;
315                             }
316 
317                         case "Content-Length":
318                             {
319                                 request.ContentLength = Convert.ToInt64(value);
320                                 break;
321                             }
322                         case "Expect":
323                             {
324                                 request.Expect = value;
325                                 break;
326                             }
327                         case "If-Modified-Since":
328                             {
329                                 request.IfModifiedSince = Convert.ToDateTime(value);
330                                 break;
331                             }
332                         default:
333                             break;
334                     }
335                     #endregion
336                 }
337             }
338             CookieCollection cc = new CookieCollection();
339             string cookieString = request.Headers[HttpRequestHeader.Cookie];
340             if (!string.IsNullOrWhiteSpace(cookieString))
341             {
342                 var spilit = cookieString.Split(';');
343                 foreach (string item in spilit)
344                 {
345                     var kv = item.Split('=');
346                     if (kv.Length == 2)
347                         cc.Add(new Cookie(kv[0].Trim(), kv[1].Trim()));
348                 }
349             }
350             trackCookies(cc);
351             if (!isTrackCookies)
352             {
353                 request.Headers[HttpRequestHeader.Cookie] = "";
354             }
355             else
356             {
357                 request.Headers[HttpRequestHeader.Cookie] = getCookieStr();
358             }
359 
360             #region 打印头
361             if (isPrint)
362             {
363                 for (int i = 0; i < request.Headers.AllKeys.Length; i++)
364                 {
365                     string key = request.Headers.AllKeys[i];
366                     System.Console.WriteLine(key + ":" + request.Headers[key]);
367                 }
368             }
369             #endregion
370 
371         }
372 
373 
374         /// <summary>
375         /// 打印ResponseHeaders
376         /// </summary>
377         /// <param name="response"></param>
378         private void printResponseHeaders(HttpWebResponse response)
379         {
380             #region 打印头
381             if (response == null) return;
382             for (int i = 0; i < response.Headers.AllKeys.Length; i++)
383             {
384                 string key = response.Headers.AllKeys[i];
385                 System.Console.WriteLine(key + ":" + response.Headers[key]);
386             }
387             #endregion
388         }
389 
390 
391         /// <summary>
392         /// 返回body内容
393         /// </summary>
394         /// <param name="response"></param>
395         /// <returns></returns>
396         private string getResponseBody(HttpWebResponse response)
397         {
398             Encoding defaultEncode = Encoding.UTF8;
399             string contentType = response.ContentType;
400             if (contentType != null)
401             {
402                 if (contentType.ToLower().Contains("gb2312"))
403                 {
404                     defaultEncode = Encoding.GetEncoding("gb2312");
405                 }
406                 else if (contentType.ToLower().Contains("gbk"))
407                 {
408                     defaultEncode = Encoding.GetEncoding("gbk");
409                 }
410                 else if (contentType.ToLower().Contains("zh-cn"))
411                 {
412                     defaultEncode = Encoding.GetEncoding("zh-cn");
413                 }
414             }
415 
416             string responseBody = string.Empty;
417             if (response.ContentEncoding.ToLower().Contains("gzip"))
418             {
419                 using (GZipStream stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress))
420                 {
421                     using (StreamReader reader = new StreamReader(stream))
422                     {
423                         responseBody = reader.ReadToEnd();
424                     }
425                 }
426             }
427             else if (response.ContentEncoding.ToLower().Contains("deflate"))
428             {
429                 using (DeflateStream stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress))
430                 {
431                     using (StreamReader reader = new StreamReader(stream, defaultEncode))
432                     {
433                         responseBody = reader.ReadToEnd();
434                     }
435                 }
436             }
437             else
438             {
439                 using (Stream stream = response.GetResponseStream())
440                 {
441                     using (StreamReader reader = new StreamReader(stream, defaultEncode))
442                     {
443                         responseBody = reader.ReadToEnd();
444                     }
445                 }
446             }
447             return responseBody;
448         }
449 
450 
451         public static string UrlEncode(string item, Encoding code)
452         {
453             return System.Web.HttpUtility.UrlEncode(item.Trim('\t').Trim(), Encoding.GetEncoding("gb2312"));
454         }
455 
456         public static string UrlEncodeByGB2312(string item)
457         {
458             return UrlEncode(item, Encoding.GetEncoding("gb2312"));
459         }
460 
461 
462         public static string UrlEncodeByUTF8(string item)
463         {
464             return UrlEncode(item, Encoding.GetEncoding("utf-8"));
465         }
466 
467         public static string HtmlDecode(string item)
468         {
469             return WebUtility.HtmlDecode(item.Trim('\t').Trim());
470         }
471  
472     }
473 }

复制代码

使用方式:

1)打开谷歌浏览器,或者F12

复制Request Headers 里面的所有内容,然后执行代码:

复制代码

string heads = @"Accept:text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01
Accept-Encoding:gzip, deflate
Accept-Language:zh-CN,zh;q=0.8
Cache-Control:no-cache
Content-Length:251
Content-Type:application/x-www-form-urlencoded; charset=UTF-8
Cookie:JSESSIONID=B1716F5DAC2F78D1E592F5421D859CFA; Hm_lvt_f44f38cf69626ed8bcfe92d72ed55922=1498099203; Hm_lpvt_f44f38cf69626ed8bcfe92d72ed55922=1498099203; cache_cars=152%7C152%7CBDL212%7C111111%7C111111%2C152%7C152%7CBy769x%7C111111%7C111111%2C152%7C152%7Cd12881%7C111111%7C111111
Host:www.xxxxxxxx.com
Origin:http://www.xxxxxxxx.com
Pragma:no-cache
Proxy-Connection:keep-alive
Referer:http://www.cheshouye.com/api/weizhang/
User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36
X-Requested-With:XMLHttpRequest";

            string url = "http://www.xxxxxxxxxxxx.com/api/weizhang/open_task?callback=jQuery1910816327";
            HttpRequestClient s = new HttpRequestClient(true);
            string content = "chepai_no=b21451&chejia_no=111111&engine_no=111111&city_id=152&car_province_id=12&input_cost=0&vcode=%7B%22cookie_str%22%3A%22%22%2C%22verify_code%22%3A%22%22%2C%22vcode_para%22%3A%7B%22vcode_key%22%3A%22%22%7D%7D&td_key=qja5rbl2d97n&car_type=02&uid=0";
            string response= s.httpPost(url, heads, content, Encoding.UTF8);

复制代码

就这样,你会惊喜的发现,卧槽!反回来的值和谷歌上显示值一个样子,

只要域名没变化,HttpRequestClient 对象就不要去改变, 多线程请使用ThreadLocal<HttpRequestClient > 

配合我很久之前写的多线类 QueueThreadBase 让你起飞.

你想暴力破解网站登录密码吗?基本思路如下:

1)强大的用户名+密码字典

2)多线程Http+代理(代理可以不用,如果服务器做了ip限制,那么代理就非常有用了,最好是透明的http代理,并且有规则剔除慢的代理)

3)验证码破解.(只要验证码不复杂,在某宝就能买的dll 可用,1000块钱上下)

猜你喜欢

转载自blog.csdn.net/cqkxzyi/article/details/82985112