电影管理网站-第一章 抓取

最近自己为了提升一下技术,写了一个简单的电影链接网站。主要分三部份:

第一个:网站

点击打开链接

第二个:后台管理

点击打开链接

账号:ag  密码:test@123

第三个:抓取服务

本文重点介绍抓取服务,目前只抓取了两个电影网站的部份信息(只供技术开发使用为目的)。


现在直接上代码:

Program

using Autofac;
using Autofac.Builder;
using OA.Common.DtoModel;
using Ohye.Film.Application;
using Ohye.Film.Domain;
using Ohye.Film.Infrastructure;
using Ohye.Film.Infrastructure.EFRepositories;
using Ohye.Film.Infrastructure.EFRepositories.UnitOfWork;
using Ohye.Film.Service.Spider;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Threading.Tasks;

namespace Ohye.Film.Service
{
    class Program
    {
        static void Main(string[] args)
        {
            Init();
            SpireFilms();
            List<string> spiredFilmDate = new List<string>();
            List<string> createIndexDate = new List<string>();
            while (true)
            {
                var date = DateTime.Now.ToString("yyyyMMdd");
                var hour = DateTime.Now.Hour;
                if (hour == 4 && !spiredFilmDate.Contains(date))
                {
                    SpireFilms();
                    spiredFilmDate.Add(date);
                }
                if (hour == 6 && !createIndexDate.Contains(date))
                {

                    System.Net.Http.HttpClient http = new System.Net.Http.HttpClient();
                    http.GetAsync("http://film.ohyewang.com/");
                    createIndexDate.Add(date);
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine($"生成首页成功:{DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss")}");
                }
                System.Threading.Thread.Sleep(TimeSpan.FromMinutes(10));
            }
        }

        private static void SpireFilms()
        {
            List<Tuple<string, int, string>> pageList = new List<Tuple<string, int, string>>();
            pageList.Add(new Tuple<string, int, string>("http://list.iqiyi.com/www/1/2-----------2017--11-1-1-iqiyi--.html", 2017, "美国"));
            pageList.Add(new Tuple<string, int, string>("http://list.iqiyi.com/www/1/2-----------2016--11-1-1-iqiyi--.html", 2016, "美国"));
            pageList.Add(new Tuple<string, int, string>("http://list.iqiyi.com/www/1/2-----------2015--11-1-1-iqiyi--.html", 2015, "美国"));

            pageList.Add(new Tuple<string, int, string>("http://list.iqiyi.com/www/1/1-----------2017--11-1-1-iqiyi--.html", 2017, "华语"));
            pageList.Add(new Tuple<string, int, string>("http://list.iqiyi.com/www/1/1-----------2016--11-1-1-iqiyi--.html", 2016, "华语"));
            pageList.Add(new Tuple<string, int, string>("http://list.iqiyi.com/www/1/1-----------2015--11-1-1-iqiyi--.html", 2015, "华语"));

            pageList.ForEach(p =>
            {
                ISplider _AIQIYI = new AIQIYI { Url = p.Item1, Year = p.Item2, Country = p.Item3 };
                _AIQIYI.SpliderResult();
            });

            List<Tuple<string, int, string>> pageListMGTV = new List<Tuple<string, int, string>>();
            pageListMGTV.Add(new Tuple<string, int, string>("https://list.mgtv.com/3/a4-537193-------2835073-2-1--a1-.html?channelId=3", 0, "美国"));
            pageListMGTV.Add(new Tuple<string, int, string>("https://list.mgtv.com/3/a4-49-------2835073-2-1--a1-.html?channelId=3", 0, "华语"));
            pageListMGTV.ForEach(p =>
            {
                ISplider _mgtv = new mgtv { Url = p.Item1, Year = p.Item2, Country = p.Item3 };
                _mgtv.SpliderResult();
            });

            //List<Tuple<string, int, string>> pageListQQ = new List<Tuple<string, int, string>>();
            //pageListQQ.Add(new Tuple<string, int, string>("http://film.qq.com/film_all_list/allfilm.html?type=movie&sort=5", 0, "美国"));
            //pageListQQ.ForEach(p =>
            //{
            //    ISplider _qq = new qq { Url = p.Item1, Year = p.Item2, Country = p.Item3 };
            //    _qq.SpliderResult();
            //});
        }

        static void Init()
        {
            AutoMapperConfig.RegisterMappings();

            var builder = IocCenter.ContainerBuilder;
            SetupResolveRules(builder);

        }
        static void SetupResolveRules(ContainerBuilder builder)
        {
            var application = Assembly.Load("Ohye.Film.Application");
            builder.Register<OAUser>(c => CreateOAUser()).AsSelf();
            builder.RegisterType<EntityManager>().AsSelf().SingleInstance();

            builder.RegisterAssemblyTypes(application)
              .Where(t => t.Name.EndsWith("Service"))
              .AsSelf().InstancePerDependency();

            builder.RegisterType<OhyeFilmDbContext>().InstancePerLifetimeScope();
            builder.RegisterType<UnitOfWork>().As<IUnitOfWork>().InstancePerDependency();
            builder.RegisterGeneric(typeof(Repository<>)).As(typeof(IRepository<>)).InstancePerDependency();
        }
        static OAUser CreateOAUser()
        {
            return new OAUser
            {
                EmplID = "",
                EmplName = "系统管理员",
                DeptID = "",
                DeptName = "总部",
            };
        }
    }
}


Config

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Ohye.Film.Service
{
    public class Config
    {
        public static string DataDir
        {
            get
            {
                return System.Configuration.ConfigurationManager.AppSettings["DataDir"];
            }
        }

        public static string TempDir
        {
            get
            {
                string temp = $"{DataDir}FilmTemp";
                if (!System.IO.Directory.Exists(temp))
                {
                    System.IO.Directory.CreateDirectory(temp);
                }
                return temp;
            }
        }
    }
}


SpireClient

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Net;
using System.Net.Http;
using HtmlAgilityPack;

namespace Ohye.Film.Service.Spider
{
    public class SpireClient
    {

        static List<string> _spiredUrlList;

        HttpClient _httpClient;


        public event EventHandler<string> Complete;
        public SpireClient()
        {
            _spiredUrlList = new List<string>();
            _httpClient = new HttpClient();
        }

        public async Task<string> GetHtml(string url)
        {
            return await _httpClient.GetStringAsync(url);
        }

     
        public void SpireUrl(string url)
        {
            if (_spiredUrlList.Contains(url)) return;
            _spiredUrlList.Add(url);
            _httpClient.GetAsync(url).ContinueWith((r) =>
            {
                HttpResponseMessage response = r.Result;

                response.Content.ReadAsStringAsync().ContinueWith((t) =>
                {
                    OnGetResult(this    , t.Result);
                });
            });
        }

        private void OnGetResult(object sender, string e)
        {
            Complete?.Invoke(sender, e);
        }

        public List<HtmlNode> SelectNodes(string content, string regex)
        {
            HtmlDocument htmlDoc = new HtmlDocument();
            htmlDoc.LoadHtml(content);
            var htmlNodes = htmlDoc.DocumentNode.SelectNodes(regex);
            if (htmlNodes == null) return new List<HtmlNode>();
            return htmlNodes.ToList();
        }
    }
}


HttpImage

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Net.Http;
using System.IO;
using OA.Infrastructure;

namespace Ohye.Film.Service.Spider
{
    public class HttpImage
    {

        public string GetImg(string url)
        {
            byte[][] images = DownloadPicAsync(new string[] { url }).Result;//多线程下载图片,充分利用CPU多核

            string imageName = url.Substring(url.LastIndexOf('/') + 1, url.Length - url.LastIndexOf('/') - 1);
            string filePath = $@"{ Config.TempDir}\{imageName}";
            using (FileStream stream = new FileStream(filePath, FileMode.OpenOrCreate))
            {
                byte[] buff = images[0];
                stream.Write(buff, 0, buff.Length);
                Console.WriteLine("成功下载图片:" + imageName);
            }
            string fileID = MongoContext.Mongo.SaveFile(filePath);
            File.Delete(filePath);
            return fileID;
        }

        /// <summary>
        /// 批量下载图片
        /// </summary>
        /// <param name="urls"></param>
        /// <returns></returns>
        public async Task<byte[][]> DownloadPicAsync(IEnumerable<string> urls)
        {
            HttpClient httpClient = new HttpClient();
            Task<byte[]>[] downloadTask = urls.Select(r => httpClient.GetByteArrayAsync(r)).ToArray();
            byte[][] data = await Task.WhenAll(downloadTask);
            return data;

        }
    }
}


AIQIYI

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Ohye.Film.DTO.Film;
using Ohye.Film.Application.Film;
using Autofac;
using Ohye.Film.Infrastructure.Enums;
using Ohye.Film.Infrastructure;

namespace Ohye.Film.Service.Spider
{
    public class AIQIYI : ISplider
    {

        public AIQIYI()
        {
        }
        public string Url { get; set; }

        public int Year { get; set; }

        public string Country { get; set; }


        public void SpliderResult()
        {
            SpireClient spireClient = new SpireClient();
            spireClient.Complete += SpireClient_Complete;
            spireClient.SpireUrl(Url);
        }

        private void SpireClient_Complete(object sender, string html)
        {
            SpireClient client = (SpireClient)sender;

            var productNodes = client.SelectNodes(html, "//ul[contains(@class,'site-piclist')]/li");

            productNodes.ForEach(p =>
            {
                var linkPic = client.SelectNodes(p.InnerHtml, "//div[@class='site-piclist_pic']/a").SingleOrDefault();

                bool canRead = !client.SelectNodes(linkPic.InnerHtml, "//p[@class='viedo_lt ']").Any();
                var productName = linkPic.Attributes.Where(x => x.Name == "title").SingleOrDefault().Value.Trim();
                var detailURL = linkPic.Attributes.Where(x => x.Name == "href").SingleOrDefault().Value.Trim();

                var detailHtml = client.GetHtml(detailURL).Result;

                var introduction = client.SelectNodes(detailHtml, "//span[@id='data-videoInfoDes']").SingleOrDefault()?.InnerText.Trim();

                var imgUrl = client.SelectNodes(linkPic.InnerHtml, "//img").SingleOrDefault().Attributes.Where(x => x.Name == "src").SingleOrDefault().Value.Trim();
                var duration = client.SelectNodes(linkPic.InnerHtml, "//span[@class='icon-vInfo']").SingleOrDefault().InnerText.Trim();

                var linkInfo = client.SelectNodes(p.InnerHtml, "//div[@class='site-piclist_info']").SingleOrDefault();
                var score = client.SelectNodes(linkInfo.InnerHtml, "//span[@class='score']").SingleOrDefault().InnerText.Trim();
                var authors = client.SelectNodes(linkInfo.InnerHtml, "//div[@class='role_info']/em/a").SelectMany(x => x.Attributes).Where(x => x.Name == "title").Select(x => x.Value).ToList();

                try
                {
                    HttpImage httpImage = new HttpImage();
                    IocCenter.Resolve<ProductService>(_productService =>
                    {
                        if (!_productService.CheckExisted(productName))
                        {
                            TimeSpan dur;
                            TimeSpan.TryParse(duration, out dur);
                            FM_ProductDTO product = new FM_ProductDTO
                            {
                                ID = Guid.NewGuid(),
                                Name = productName,
                                CategoryID = Guid.Parse("d012fcc6-b25a-447c-b079-95cc293a3f92"),
                                Year = Year,
                                Score = decimal.Parse(score),
                                Duration = dur,
                                CanRead = canRead,
                                ImageID = null,
                                IsDeleted = false,
                                Country = Country,
                                Content = new FM_ContentDTO
                                {
                                    ID = Guid.NewGuid(),
                                    Introduction = introduction,
                                    ReadCount = 0,
                                    DownLoadCount = 0
                                },
                                LinkList = !canRead ? new List<FM_LinkDTO>() : new List<FM_LinkDTO>
                            {
                             new FM_LinkDTO
                             {
                                  ID=Guid.NewGuid(),
                                  Address=detailURL,
                                  AuditStatus= AuditStatus.AuditPass,
                                  AuditTime=DateTime.Now,
                                  LinkType=LinkType.PlayUrl,
                             }
                            },
                                AuthorList = authors.Select(x => new FM_AuthorDTO
                                {
                                    ID = Guid.NewGuid(),
                                    AuhorType = AuhorType.Main,
                                    Name = x
                                }
                             ).ToList()
                            };
                            product.ImageID = httpImage.GetImg(imgUrl);
                            _productService.Add(product);
                            Console.ForegroundColor = ConsoleColor.DarkGreen;
                            Console.WriteLine(productName);
                            Console.ForegroundColor = ConsoleColor.Gray;
                        }
                        else if (canRead)
                        {
                            var productInfo = _productService.CheckCanRead(productName);
                            if (!productInfo.Item1)
                            {
                                Console.ForegroundColor = ConsoleColor.Green;
                                Console.WriteLine($"发现新可播放电影:{productName}");
                                //重新更新
                                _productService.UpdateLink(productInfo.Item2, new List<FM_LinkDTO>{
                                   new FM_LinkDTO
                                   {
                                        ID = Guid.NewGuid(),
                                        Address = detailURL,
                                        AuditStatus = AuditStatus.AuditPass,
                                        AuditTime = DateTime.Now,
                                        LinkType = LinkType.PlayUrl
                                   }
                                 });


                            }
                        }
                        else
                        {
                            Console.WriteLine($"已存在:{productName}");
                        }
                    });


                }
                catch (Exception ex)
                {
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine(productName + ex.Message + ex.InnerException);
                    Console.WriteLine("failed");
                }
            });

            //查找下一页
            var cc = client.SelectNodes(html, "//div[@class='mod-page']/a[@data-search-page='item']").ToList();
            var pagesNodes = client.SelectNodes(html, "//div[@class='mod-page']/a[@data-search-page='item']").ToList().Where(p => p.Attributes["data-key"].Value != "down" && p.Attributes["data-key"].Value != "up").Select(p => new Tuple<int, string>(Int32.Parse(p.Attributes["data-key"].Value), p.Attributes["href"].Value));

            var currentPage = client.SelectNodes(html, "//div[@class='mod-page']/span[@class='curPage']").SingleOrDefault();

            if (currentPage != null)
            {
                var pageIndex = Int32.Parse(currentPage.InnerText);
                var nextPageIndex = pageIndex + 1;
                pagesNodes.ToList().ForEach(x =>
                {
                    if (x.Item1 == nextPageIndex)
                    {
                        Url = $"http://list.iqiyi.com/{x.Item2}";
                        SpliderResult();
                    }

                });
            }


        }


    }
}


mgtv

using Ohye.Film.Application.Film;
using Ohye.Film.DTO.Film;
using Ohye.Film.Infrastructure;
using Ohye.Film.Infrastructure.Enums;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Ohye.Film.Service.Spider
{


    public class mgtv : ISplider
    {

        public mgtv()
        {
        }
        public string Url { get; set; }

        public int Year { get; set; }

        public string Country { get; set; }


        public void SpliderResult()
        {
            SpireClient spireClient = new SpireClient();
            spireClient.Complete += SpireClient_Complete;
            spireClient.SpireUrl(Url);
        }

        private void SpireClient_Complete(object sender, string html)
        {
            SpireClient client = (SpireClient)sender;

            var productNodes = client.SelectNodes(html, "//ul/li[contains(@class,'m-result-list-item')]");

            productNodes.ForEach(p =>
            {
                var linkPic = client.SelectNodes(p.InnerHtml, "//a[contains(@class,'u-video u-video-y')]").SingleOrDefault();

                bool canRead = !client.SelectNodes(linkPic.InnerHtml, "//i[@class='mark-v']").Any();
                var productName = client.SelectNodes(p.InnerHtml, "//a[@class='u-title']").FirstOrDefault().InnerHtml.Trim();
                var detailURL = linkPic.Attributes.Where(x => x.Name == "href").SingleOrDefault().Value.Trim();
                detailURL = $"https://{detailURL.Substring(2)}";
                var detailHtml = client.GetHtml(detailURL).Result;

                var introduction = client.SelectNodes(detailHtml, "//p[@class='u-meta-intro']/span[@class='details']").FirstOrDefault()?.InnerText.Trim();

                var imgUrl = client.SelectNodes(linkPic.InnerHtml, "//img[@class='u-pic']").SingleOrDefault().Attributes.Where(x => x.Name == "src").SingleOrDefault().Value.Trim();
                imgUrl = $"https://{imgUrl.Substring(2)}";
                var duration = "";


                var score = client.SelectNodes(linkPic.InnerHtml, "//em[@class='u-meta']").SingleOrDefault().InnerText.Trim();
                var authors = client.SelectNodes(p.InnerHtml, "//span[@class='u-desc']/a").SelectMany(x => x.Attributes).Where(x => x.Name == "title").Select(x => x.Value).ToList();

                try
                {
                    HttpImage httpImage = new HttpImage();
                    IocCenter.Resolve<ProductService>(_productService =>
                    {
                        if (!_productService.CheckExisted(productName))
                        {
                            TimeSpan dur;
                            TimeSpan.TryParse(duration, out dur);
                            FM_ProductDTO product = new FM_ProductDTO
                            {
                                ID = Guid.NewGuid(),
                                Name = productName,
                                CategoryID = Guid.Parse("d012fcc6-b25a-447c-b079-95cc293a3f92"),
                                Year = Year,
                                Score = decimal.Parse(score == "" ? "0" : score),
                                Duration = dur,
                                CanRead = canRead,
                                ImageID = null,
                                IsDeleted = false,
                                Country = Country,
                                Content = new FM_ContentDTO
                                {
                                    ID = Guid.NewGuid(),
                                    Introduction = introduction,
                                    ReadCount = 0,
                                    DownLoadCount = 0
                                },
                                LinkList = !canRead ? new List<FM_LinkDTO>() : new List<FM_LinkDTO>
                            {
                             new FM_LinkDTO
                             {
                                  ID=Guid.NewGuid(),
                                  Address=detailURL,
                                  AuditStatus= AuditStatus.AuditPass,
                                  AuditTime=DateTime.Now,
                                  LinkType=LinkType.PlayUrl,
                             }
                            },
                                AuthorList = authors.Select(x => new FM_AuthorDTO
                                {
                                    ID = Guid.NewGuid(),
                                    AuhorType = AuhorType.Main,
                                    Name = x
                                }
                             ).ToList()
                            };
                            product.ImageID = httpImage.GetImg(imgUrl);
                            _productService.Add(product);
                            Console.ForegroundColor = ConsoleColor.DarkGreen;
                            Console.WriteLine(productName);
                            Console.ForegroundColor = ConsoleColor.Gray;
                        }
                        else if (canRead)
                        {
                            var productInfo = _productService.CheckCanRead(productName);
                            if (!productInfo.Item1)
                            {
                                Console.ForegroundColor = ConsoleColor.Green;
                                Console.WriteLine($"发现新可播放电影:{productName}");
                                //重新更新
                                _productService.UpdateLink(productInfo.Item2, new List<FM_LinkDTO>{
                                   new FM_LinkDTO
                                   {
                                        ID = Guid.NewGuid(),
                                        Address = detailURL,
                                        AuditStatus = AuditStatus.AuditPass,
                                        AuditTime = DateTime.Now,
                                        LinkType = LinkType.PlayUrl
                                   }
                                 });


                            }
                        }
                        else
                        {
                            Console.WriteLine($"已存在:{productName}");
                        }
                    });


                }
                catch (Exception ex)
                {
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine(productName + ex.Message + ex.InnerException);
                    Console.WriteLine("failed");
                }
            });

            //查找下一页
            var pages = client.SelectNodes(html, "//div[contains(@class,'w-pages w-pages-default')]/ul/li/a").ToList();
            var pagesNodes = pages.Where(p => 1 == 1&& p.Attributes["href"]!=null&&p.InnerText != "..."&&p.InnerText!= "  ").Select(p => new Tuple<int, string>(Int32.Parse(p.InnerText), p.Attributes["href"].Value)).ToList();

            var currentPage = pages.Where(p => p.Attributes["class"] != null && p.Attributes["class"].Value == "current").SingleOrDefault();

            if (currentPage != null)
            {
                var pageIndex = Int32.Parse(currentPage.InnerText);
                var nextPageIndex = pageIndex + 1;
                pagesNodes.ToList().ForEach(x =>
                {
                    if (x.Item1 == nextPageIndex)
                    {
                        Url = $"https://list.mgtv.com/{x.Item2}";
                        SpliderResult();
                    }

                });
            }


        }


    }
}


qq

using Ohye.Film.Application.Film;
using Ohye.Film.DTO.Film;
using Ohye.Film.Infrastructure;
using Ohye.Film.Infrastructure.Enums;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Ohye.Film.Service.Spider
{
    public class qq : ISplider
    {

        public qq()
        {
        }
        public string Url { get; set; }

        public int Year { get; set; }

        public string Country { get; set; }


        public void SpliderResult()
        {
            SpireClient spireClient = new SpireClient();
            spireClient.Complete += SpireClient_Complete;
            spireClient.SpireUrl(Url);
        }

        private void SpireClient_Complete(object sender, string html)
        {
            SpireClient client = (SpireClient)sender;

            var productNodes = client.SelectNodes(html, "//ul[@class='figures_list']/li");

            productNodes.ForEach(p =>
            {
                var linkPic = client.SelectNodes(p.InnerHtml, "//a").SingleOrDefault();

                bool canRead = !client.SelectNodes(linkPic.InnerHtml, "//i[@class='mark_v ']").Any();
                var productName = linkPic.Attributes.Where(x => x.Name == "title").SingleOrDefault().Value.Trim();
                var detailURL = linkPic.Attributes.Where(x => x.Name == "href").SingleOrDefault().Value.Trim();

                var detailHtml = client.GetHtml(detailURL).Result;

                var introduction = client.SelectNodes(detailHtml, "//span[@id='data-videoInfoDes']").SingleOrDefault()?.InnerText.Trim();

                var imgUrl = client.SelectNodes(linkPic.InnerHtml, "//img").SingleOrDefault().Attributes.Where(x => x.Name == "src").SingleOrDefault().Value.Trim();
                var duration = ""; //client.SelectNodes(linkPic.InnerHtml, "//span[@class='icon-vInfo']").SingleOrDefault().InnerText.Trim();

                var linkInfo = client.SelectNodes(p.InnerHtml, "//div[@class='figure_title_score']").SingleOrDefault();
                var score = client.SelectNodes(linkInfo.InnerHtml, "//div[@class='figure_score']/em[@class='score_l']").SingleOrDefault().InnerText.Trim()+"."+ client.SelectNodes(p.InnerHtml, "//div[@class='figure_score']/em[@class='score_2']").SingleOrDefault().InnerText.Trim();
                var authors = new List<string>();//client.SelectNodes(linkInfo.InnerHtml, "//div[@class='role_info']/em/a").SelectMany(x => x.Attributes).Where(x => x.Name == "title").Select(x => x.Value).ToList();

                try
                {
                    HttpImage httpImage = new HttpImage();
                    IocCenter.Resolve<ProductService>(_productService =>
                    {
                        if (!_productService.CheckExisted(productName))
                        {
                            TimeSpan dur;
                            TimeSpan.TryParse(duration, out dur);
                            FM_ProductDTO product = new FM_ProductDTO
                            {
                                ID = Guid.NewGuid(),
                                Name = productName,
                                CategoryID = Guid.Parse("d012fcc6-b25a-447c-b079-95cc293a3f92"),
                                Year = Year,
                                Score = decimal.Parse(score),
                                Duration = dur,
                                CanRead = canRead,
                                ImageID = null,
                                IsDeleted = false,
                                Country = Country,
                                Content = new FM_ContentDTO
                                {
                                    ID = Guid.NewGuid(),
                                    Introduction = introduction,
                                    ReadCount = 0,
                                    DownLoadCount = 0
                                },
                                LinkList = !canRead ? new List<FM_LinkDTO>() : new List<FM_LinkDTO>
                            {
                             new FM_LinkDTO
                             {
                                  ID=Guid.NewGuid(),
                                  Address=detailURL,
                                  AuditStatus= AuditStatus.AuditPass,
                                  AuditTime=DateTime.Now,
                                  LinkType=LinkType.PlayUrl,
                             }
                            },
                                AuthorList = authors.Select(x => new FM_AuthorDTO
                                {
                                    ID = Guid.NewGuid(),
                                    AuhorType = AuhorType.Main,
                                    Name = x
                                }
                             ).ToList()
                            };
                            product.ImageID = httpImage.GetImg(imgUrl);
                            _productService.Add(product);
                            Console.ForegroundColor = ConsoleColor.DarkGreen;
                            Console.WriteLine(productName);
                            Console.ForegroundColor = ConsoleColor.Gray;
                        }
                        else if (canRead)
                        {
                            var productInfo = _productService.CheckCanRead(productName);
                            if (!productInfo.Item1)
                            {
                                Console.ForegroundColor = ConsoleColor.Green;
                                Console.WriteLine($"发现新可播放电影:{productName}");
                                //重新更新
                                _productService.UpdateLink(productInfo.Item2, new List<FM_LinkDTO>{
                                   new FM_LinkDTO
                                   {
                                        ID = Guid.NewGuid(),
                                        Address = detailURL,
                                        AuditStatus = AuditStatus.AuditPass,
                                        AuditTime = DateTime.Now,
                                        LinkType = LinkType.PlayUrl
                                   }
                                 });


                            }
                        }
                        else
                        {
                            Console.WriteLine($"已存在:{productName}");
                        }
                    });


                }
                catch (Exception ex)
                {
                    Console.ForegroundColor = ConsoleColor.Red;
                    Console.WriteLine(productName + ex.Message + ex.InnerException);
                    Console.WriteLine("failed");
                }
            });

            //查找下一页
            var cc = client.SelectNodes(html, "//div[@class='mod-page']/a[@data-search-page='item']").ToList();
            var pagesNodes = client.SelectNodes(html, "//div[@class='mod-page']/a[@data-search-page='item']").ToList().Where(p => p.Attributes["data-key"].Value != "down" && p.Attributes["data-key"].Value != "up").Select(p => new Tuple<int, string>(Int32.Parse(p.Attributes["data-key"].Value), p.Attributes["href"].Value));

            var currentPage = client.SelectNodes(html, "//div[@class='mod-page']/span[@class='curPage']").SingleOrDefault();

            if (currentPage != null)
            {
                var pageIndex = Int32.Parse(currentPage.InnerText);
                var nextPageIndex = pageIndex + 1;
                pagesNodes.ToList().ForEach(x =>
                {
                    if (x.Item1 == nextPageIndex)
                    {
                        Url = $"http://list.iqiyi.com/{x.Item2}";
                        SpliderResult();
                    }

                });
            }


        }


    }
}


ISplider

using Ohye.Film.DTO.Film;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Ohye.Film.Service.Spider
{
    public interface ISplider
    {
        /// <summary>
        /// URL
        /// </summary>
        string Url { get; set; }

        void SpliderResult();
    }
}

后续....

感兴趣的可以加入下面群



猜你喜欢

转载自blog.csdn.net/xiaoxionglove/article/details/78995483