HtmlAgilityPack+C#做IP代理爬虫(一)

https://blog.csdn.net/tomy2426214836/article/details/89643184
一、查找资料并进可能多的抓取IP代理数据、并存储IP代理池
二、从代理池中筛选数据并把有效数据添加到另一张表中,并及时更新
三、定时更新IP代理池
1、查找免费的IP代理平台网站
西刺代理
89免费代理
IP海
三一代理
云代理
66免费代理
2、下载插件HtmlAgilityPack
https://download.csdn.net/download/tomy2426214836/11144447
3、创建程序
笔者只创建了ip海的代理IP

       static void Main(string[] args)
        {
            Task task =Task.Run(new Action( Init));
            
            task.Wait();
            Console.ReadKey();
        }

        public static async void Init()
        {
            try
            {

                await xicaidaili();
                await _89ipdaili();
                await iphaidaili();
                await _31daili();
                await _3366daili();
                await ip_66daili();

            }
            finally { }

        }

        public static async Task iphaidaili()
        {

            //www.mayidaili.com/free/education
            string[] url = new string[] { "http://www.iphai.com/", "http://www.iphai.com/free/ng", "http://www.iphai.com/free/np", "http://www.iphai.com/free/wg", "http://www.iphai.com/free/wp" };
            for (int i = 0; i < url.Length; i++)
            {
                List<string> lists = await GetHtmlIPslListiphai(url[i]);
                if (lists.Count > 0) { Thread thread = new Thread(new ParameterizedThreadStart(Insertintodattabaseiphai)); thread.Start(lists); thread.IsBackground = true;   Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Success:  " + url[i]); } else {
                    Console.ForegroundColor = ConsoleColor.White; Console.WriteLine("Fail:  " + url[i]);
                }

            }

           

        }

        private static Task<List<string>> GetHtmlIPslListiphai(string url)
        {
            return Task.Run(() =>
            {
                List<string> lists = new List<string>();

                try
                {
                    HtmlWeb html = new HtmlWeb();
                    html.UserAgent = randomUseragent();
                    HtmlDocument htmlDoc = html.LoadFromWebAsync(url).Result;
                   
                    HtmlNode list = htmlDoc.DocumentNode.SelectSingleNode("//div/table");
                    if (list != null)
                    {
                        lock (list)
                        {
                            HtmlNodeCollection listnode = list.SelectNodes("tr");
                            if (listnode != null)
                            {
                                string str = "";
                                if (listnode == null) return lists;
                                for (int i = 1; i < listnode.Count; i++)
                                {
                                    str = "";
                                    HtmlNodeCollection nodes = listnode[i].SelectNodes("td");

                                    for (int j = 0; j < nodes.Count; j++)
                                    {
                                        if(url== "http://www.iphai.com/")
                                            if (j!=4)
                                            {
                                                str += "'" + nodes[j].InnerText.Trim() + "',";
                                            }
                                            else
                                            {

                                            }
                                        else
                                        str += "'" + nodes[j].InnerText.Trim() + "',";
                                    }
                               
                                if (!string.IsNullOrEmpty(str))
                                    lists.Add(str.Substring(0, str.Length - 1));
                            }
                            }

                            return lists;
                        }
                    }
                    return lists;
                }
                finally { }
          });
        }

        private static  void  Insertintodattabaseiphai(object taskResult)
        {
            string str = "";
            List<string> listss = taskResult as List<string>;
            
            string sqlstr = "";
            string[] sssql;
            for (int i = 0; i < listss.Count; i++)
            {
                sssql = listss[i].Replace("\'", " ").Split(',');
                sqlstr = "if not exists( select * from Tipcloud where ip='" + sssql[0] + "' and port=" + sssql[1] + ")";
                str += sqlstr;
                str += " insert into Tipcloud(ip, port, anonymity, type, position, speed, lastverifytime) values(" + listss[i] + ");";

            }
            if (!string.IsNullOrEmpty(str))
                lock (str)
                {
                    Task task1 = Task.Factory.StartNew(() => SqlHelper.ExecuteNonquery(str, System.Data.CommandType.Text, null));

                }
        }


4、运行效果
在这里插入图片描述
数据库存储
在这里插入图片描述
在这里插入图片描述

发布了54 篇原创文章 · 获赞 40 · 访问量 241万+

猜你喜欢

转载自blog.csdn.net/tomy2426214836/article/details/89527787