HtmlAgilityPack类说明---Parse解析Html文件

对于Html文件的解析

名称 描述
来自文件 从文件中加载HTML
来自字符串 从字符串中加载HTML
来自网络 从网络资源中加载HTML
从浏览器 从浏览器中加载HTML

从文件中加载HTML

public static void Main()
{		
	var path = @"test.html";
		
	var doc = new HtmlDocument();
		
        doc.Load(path);
		
	var node = doc.DocumentNode.SelectSingleNode("//body");

        Console.WriteLine(node.OuterHtml);	
		
}

从字符串中加载HTML

var html = @"<!DOCTYPE html>
<html>
<body>
	<h1>This is <b>bold</b> heading</h1>
	<p>This is <u>underlined</u> paragraph</p>
	<h2>This is <i>italic</i> heading</h2>
</body>
</html> ";

var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(html);

var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//body");

Console.WriteLine(htmlBody.OuterHtml);

在网络中加载HTML

var html = @"http://html-agility-pack.net/";

HtmlWeb web = new HtmlWeb();

var htmlDoc = web.Load(html);

var node = htmlDoc.DocumentNode.SelectSingleNode("//head/title");

Console.WriteLine("Node Name: " + node.Name + "\n" + node.OuterHtml);

从浏览器中加载HTML

using System.Windows.Forms;

string url = "http://html-agility-pack/from-browser";

var web1 = new HtmlWeb();
var doc1 = web1.LoadFromBrowser(url, o =>
{
	var webBrowser = (WebBrowser) o;

	// WAIT until the dynamic text is set
	return !string.IsNullOrEmpty(webBrowser.Document.GetElementById("uiDynamicText").InnerText);
});
var t1 = doc1.DocumentNode.SelectSingleNode("//div[@id='uiDynamicText']").InnerText;

var web2 = new HtmlWeb();
var doc2 = web2.LoadFromBrowser(url, html =>
{
	// WAIT until the dynamic text is set
	return !html.Contains("<div id=\"uiDynamicText\"></div>");
});
var t2 = doc2.DocumentNode.SelectSingleNode("//div[@id='uiDynamicText']").InnerText;

Console.WriteLine("Text 1: " + t1);
Console.WriteLine("Text 2: " + t2);

猜你喜欢

转载自blog.csdn.net/Day_and_Night_2017/article/details/89842727