c# 模拟登录,后台爬数据

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/fuck51cto/article/details/80617689
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Text.RegularExpressions;
using System.Collections;

/*
* author:zvall
*/

namespace ConsoleApplication2
{
    class Program
    {
        static CookieContainer GetCookie(string postString, string postUrl)
        {

            if (postUrl.ToLower().Contains("https://"))
            {
                ServicePointManager.ServerCertificateValidationCallback +=
                                    delegate(object sender,
                                           X509Certificate certificate,
                                           X509Chain chain,
                                            SslPolicyErrors https://xxxxx.com/admin/login.jsp)
                                    {
                                        return true;
                                    };
            }

            CookieContainer cookie = new CookieContainer();

            HttpWebRequest httpRequset = (HttpWebRequest)HttpWebRequest.Create(postUrl);
            httpRequset.CookieContainer = cookie;
            httpRequset.Method = "POST";
            httpRequset.KeepAlive = true;
            httpRequset.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko";
            httpRequset.Accept = "text/html, application/xhtml+xml, */*";
            httpRequset.ContentType = "application/x-www-form-urlencoded";
            byte[] bytes = System.Text.Encoding.UTF8.GetBytes(postString);
            httpRequset.ContentLength = bytes.Length;
            Stream stream = httpRequset.GetRequestStream();
            stream.Write(bytes, 0, bytes.Length);
            stream.Close();
            HttpWebResponse httpResponse = (HttpWebResponse)httpRequset.GetResponse();
            return cookie;
        }

        static string GetContent(CookieContainer cookie, string url)
        {
            string content = string.Empty;
            HttpWebResponse httpResponse = null;
            Stream responsestream = null;
            StreamReader sr = null;
            try
            {
                if (url.ToLower().Contains("https://"))
                {
                    ServicePointManager.ServerCertificateValidationCallback +=
                                        delegate(object sender,
                                               X509Certificate certificate,
                                               X509Chain chain,
                                                SslPolicyErrors sslPolicyErrors)
                                        {
                                            return true;
                                        };
                }

                
                HttpWebRequest httpRequest = (HttpWebRequest)HttpWebRequest.Create(url);
                httpRequest.CookieContainer = cookie;
                httpRequest.Referer = url;
                httpRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko";
                httpRequest.Accept = "text/html, application/xhtml+xml, */*";
                httpRequest.ContentType = "application/x-www-form-urlencoded";
                httpRequest.Method = "GET";
                httpResponse = (HttpWebResponse)httpRequest.GetResponse();

                responsestream = httpResponse.GetResponseStream();
                sr = new StreamReader(responsestream, System.Text.Encoding.UTF8);
       
                content = sr.ReadToEnd();
             
               
                

            }
            catch (WebException e)
            {

            }
            catch (System.Exception ex)
            {

            }
            finally
            {
                if (httpResponse != null)
                    httpResponse.Close();
                if (responsestream != null)
                    responsestream.Close();
                if (sr != null)
                    sr.Close();

            }
            return content;

        }
        static void Main(string[] args)
        {

            string url = "https://xxxxx.com/admin/login.jsp";
            try
            {
                int start_page = Convert.ToInt32(args[0]);
                int end_page = Convert.ToInt32(args[1]);
                 end_page = end_page + 1;
                FileStream fs = null;
                string filePath = args[3];
                fs = File.OpenWrite(filePath);
                fs.Position = fs.Length;
                CookieContainer cookie = GetCookie("username=admin&password=admin&submit=1", "https://xxxxx.com/admin/login.jsp");


                for (int i = start_page; i != end_page; ++i)
                {
                    string urls = url + i;
                    Console.WriteLine(urls);



                    string docment = GetContent(cookie, urls);
                    if (docment == string.Empty)
                    {
                        File.AppendAllText("error_url.txt", urls+"\n", Encoding.UTF8);
                        continue;
                    }
                    if (!docment.Contains("用户管理"))
                    {
                        cookie = GetCookie("username=admin&password=admin&submit=1", "https://xxxxx.com/admin/login.jsp");
                        File.AppendAllText("error_url.txt", urls + "\n", Encoding.UTF8);
                        continue;
                    }
                    Regex re = new Regex("<table id=\"Userlist\" cellspacing=\"0\" border=\"1\">(.*)?<td colspan=\"15\" style=\"text-align: right\">", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Singleline);
                    Match match = re.Match(docment);

                    if (match.Success)
                    {
                        string tr_html = match.ToString();
                        Regex tr_re = new Regex(@"<tr>[\s\S].*?</tr>", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Singleline);
                        if (tr_re.IsMatch(tr_html))
                        {
                            MatchCollection tr_mc = tr_re.Matches(tr_html);
                            for (int x = 0; x != tr_mc.Count; ++x)
                            {
                                string td_html = tr_mc[x].Value.ToString();
                                Regex td_re = new Regex(@"<td>(?<key>[\s\S].*?)</td>", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Singleline);
                                MatchCollection mc1 = td_re.Matches(td_html);

                                ArrayList arr = new ArrayList();
                                foreach (Match m in mc1)
                                {
                                    // Console.WriteLine(m.Groups["key"].Value.Trim());

                                    string values = new Regex("[\\s]+").Replace(m.Groups["key"].Value.Trim(), "");
                                    if (values.Contains("selectRadio"))
                                    {
                                        continue;
                                    }
                                    arr.Add(values);
                                }

                                if (arr.Count > 0)
                                {
                                    string str1 = string.Join(",", arr.ToArray(typeof(string)) as string[]);
                                    Encoding encoder = Encoding.UTF8;
                              
                                     byte[] bytes = encoder.GetBytes(str1+"\n");
                                    fs.Write(bytes, 0, bytes.Length);
                                     fs.Flush();
                                }

                            }

                        }

                    }


                    //Console.ReadKey();

                }
                fs.Close();



            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }

        }
    }
}

猜你喜欢

转载自blog.csdn.net/fuck51cto/article/details/80617689