PHP正则获取HTML里需要的数据

                                                                    PHP正则获取HTML里需要的数据

1、使用的函数:

  • preg_match()匹配单个    
  • preg_match_all()匹配所有

2、代码:

/**
 * @todo 正则获取HTML中想要的数据 
*/
public function regexAction(){
	//$text=file_get_contents('http://www.***.***/');
	$text = $this->getHtml();
	
	//preg_match_all('# <a\b[^>]*\bdata-code=([^\s>]+)[^>]*>[\s\S]*?([^<>]*)</a>#i', $text, $match);	//匹配 [data-code=] 里面的内容,包括""
	preg_match_all('# <a\b[^>]*\bdata-code="([^\s>]+)"[^>]*>[\s\S]*?([^<>]*)</a>#i', $text, $match);	//匹配 [data-code=""] 括号里面的内容
	
	preg_match_all('#<span>([^<]*)</span>#',$text,$match1);	//匹配span标签里面的内容
	
	preg_match_all ('/<a href=\"(.*?)\".*?>(.*?)<\/a>/i',$text,$match2);	//匹配所有的a标签
	
	echo '<pre>';
	print_r($match);
	print_r($match1);
	print_r($match2);
	echo '</pre>';die;
}

function getHtml(){
	$html = '<a href="">A标签1哦</a>'.
			'<a href="">A标签2哦</a>'.
			'<a href="">A标签3哦</a>'.
		'<dl class="dl-bg">
			<dt>A</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/axd.shtml" id="a-anxindakuaixi" data-code="anxindakuaixi">
					<span>安信达</span>
				</a>
				<a href="https://www.kuaidi100.com/all/auspost.shtml" id="a-auspost" data-code="auspost" style="width:110px;">
					<span>澳大利亚邮政</span>
				</a>
			</dd>
		</dl>
		<dl>
			<dt>B</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/htky.shtml" id="a-huitongkuaidi2" data-code="huitongkuaidi">
					<span>百世快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/yzgn.shtml" id="a-youzhengguonei2" data-code="youzhengguonei">
					<span>包裹平邮</span>
				</a>
				<a href="https://www.kuaidi100.com/all/bangsongwuliu.shtml" id="a-bangsongwuliu" data-code="bangsongwuliu">
					<span>邦送物流</span>
				</a>
			</dd>
		</dl>
		<dl class="dl-bg">
			<dt>D</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/dhl.shtml" id="a-dhl" data-code="dhl">
					<span>DHL快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/dtwl.shtml" id="a-datianwuliu" data-code="datianwuliu">
					<span>大田物流</span>
				</a>
				<a href="https://www.kuaidi100.com/all/dbwl.shtml" id="a-debangwuliu2" data-code="debangwuliu">
					<span>德邦快递</span>
				</a>
			</dd>
		</dl>
		<dl>
			<dt>E</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/ems.shtml" id="a-ems2" title="EMS国内" data-code="ems">
					<span>EMS国内</span>
				</a>
				<a href="https://www.kuaidi100.com/all/emsguoji.shtml" id="a-emsguoji" title="EMS国际" data-code="emsguoji">
					<span>EMS国际</span>
				</a>
				<a href="https://www.kuaidi100.com/all/ems.shtml" id="a-ems3" title="E邮宝" data-code="ems">
					<span>E邮宝</span>
				</a>
			</dd>
		</dl>
		<dl class="dl-bg">
			<dt>F</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/rufengda.shtml" id="a-fedex2" title="凡客配送" data-code="rufengda">
					<span>凡客配送</span>
				</a>
				<a href="https://www.kuaidi100.com/all/fedex.shtml" id="a-fedex" title="fedex快递查询" data-code="fedex" style="width:110px;">
					<span>FedEx(国际件)</span>
				</a>
			</dd>
		</dl>
		<dl>
			<dt>G</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/guotongkuaidi.shtml" id="a-guotongkuaidi" data-code="guotongkuaidi">
					<span>国通快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/yzgj.shtml" id="a-youzhengguonei" data-code="youzhengguonei">
					<span>挂号信</span>
				</a>
				<a href="https://www.kuaidi100.com/all/gongsuda.shtml" id="a-gongsuda" data-code="gongsuda">
					<span>共速达</span>
				</a>
				<a href="https://www.kuaidi100.com/all/yzgj.shtml" id="a-youzhengguoji" data-code="youzhengguoji">
					<span>国际小包</span>
				</a>
			</dd>
		</dl>
		<dl class="dl-bg">
			<dt>H</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/tdhy.shtml" id="a-tiandihuayu2" data-code="tiandihuayu">
					<span>华宇物流</span>
				</a>
			</dd>
		</dl>
		<dl>
			<dt>J</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/jjwl.shtml" id="a-jiajiwuliu" title="佳吉物流" data-code="jiajiwuliu">
					<span>佳吉快运</span>
				</a>
				<a href="https://www.kuaidi100.com/all/jywl.shtml" id="a-jiayiwuliu" title="佳怡物流查询" data-code="jiayiwuliu">
					<span>佳怡物流</span>
				</a>
				<a href="https://www.kuaidi100.com/all/canpost.shtml" id="a-canpost" data-code="canpost">
					<span>加拿大邮政</span>
				</a>
			</dd>
		</dl>
		<dl class="dl-bg">
			<dt>K</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/kjkd.shtml" id="a-kuaijiesudi" title="快捷快递" data-code="kuaijiesudi">
					<span>快捷速递</span>
				</a>
			</dd>
		</dl>
		<dl>
			<dt>L</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/lb.shtml" id="a-longbanwuliu" title="龙邦速递" data-code="longbanwuliu">
					<span>龙邦速递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/lianb.shtml" id="a-lianbangkuaidi" title="联邦快递查询" data-code="lianbangkuaidi">
					<span>联邦快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/lhtwl.shtml" id="a-lianhaowuliu" title="联昊通物流" data-code="lianhaowuliu">
					<span>联昊通</span>
				</a>
			</dd>
		</dl>
		<dl class="dl-bg">
			<dt>N</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/ndkd.shtml" id="a-ganzhongnengda" data-code="ganzhongnengda">
					<span>能达速递</span>
				</a>
			</dd>
		</dl>
		<dl class="dl-bg">
			<dt>Q</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/qy.shtml" id="a-quanyikuaidi" data-code="quanyikuaidi">
					<span>全一快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/qjt.shtml" id="a-quanfengkuaidi" title="全峰快递单号查询"  data-code="quanfengkuaidi">
					<span>全峰快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/qrt.shtml" id="a-quanritongkuaidi" data-code="quanritongkuaidi">
					<span>全日通</span>
				</a>
			</dd>
		</dl>
		<dl>
			<dt>S</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/st.shtml" id="a-shentong2" title="申通快递单号查询" data-code="shentong">
					<span>申通快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/sf.shtml" id="a-shunfeng2" title="顺丰快递查询" data-code="shunfeng">
					<span>顺丰快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/sewl.shtml" id="a-suer" title="速尔物流" data-code="suer">
					<span>速尔快递</span>
				</a>
			</dd>
		</dl>
		<dl class="dl-bg">
			<dt>T</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/tnt.shtml" id="a-tnt" data-code="tnt">
					<span>TNT快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/tt.shtml" id="a-tiantian2" title="海航天天快递" data-code="tiantian">
					<span>天天快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/tdhy.shtml" id="a-tiandihuayu" data-code="tiandihuayu">
					<span>天地华宇</span>
				</a>
			</dd>
		</dl>
		<dl>
			<dt>U</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/ups.shtml" id="a-ups" data-code="ups">
					<span>UPS快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/usps.shtml" id="a-usps" data-code="usps" style="width:120px;">
					<span>USPS(美国邮政)</span>
				</a>
			</dd>
		</dl>
		<dl class="dl-bg">
			<dt>X</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/xbwl.shtml" id="a-xinbangwuliu"  data-code="xinbangwuliu">
					<span>新邦物流</span>
				</a>
				<a href="/all/newegg.shtml" id="a-newegg"  data-code="neweggozzo">
					<span>新蛋物流</span>
				</a>
				<a href="/all/hkpost.shtml" id="a-hkpost"  data-code="hkpost">
					<span>中国香港邮政</span>
				</a>
			</dd>
		</dl>
		<dl>
			<dt>Y</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/yt.shtml" id="a-yuantong2" title="圆通快递查询单号"  data-code="yuantong">
					<span>圆通快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/yd.shtml" id="a-yunda2" title="韵达快递单号查询"  data-code="yunda">
					<span>韵达快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/yzgn.shtml" id="a-youzhengguonei3" data-code="youzhengguonei">
					<span>邮政包裹</span>
				</a>
				<a href="https://www.kuaidi100.com/all/yskd.shtml" id="a-youshuwuliu" title="优速快递"  data-code="youshuwuliu">
					<span>优速快递</span>
				</a>
			</dd>
		</dl>
		<dl class="dl-bg">
			<dt>Z</dt>
			<dd>
				<a href="https://www.kuaidi100.com/all/zt.shtml" id="a-zhongtong2" data-code="zhongtong">
					<span>中通快递</span>
				</a>
				<a href="https://www.kuaidi100.com/all/ztky.shtml" id="a-zhongtiewuliu"  title="中铁快运" data-code="zhongtiewuliu">
					<span>中铁快运</span>
				</a>
				<a href="https://www.kuaidi100.com/all/zjs.shtml" id="a-zhaijisong2" data-code="zhaijisong">
					<span>宅急送</span>
				</a>
				<a href="https://www.kuaidi100.com/all/zywl.shtml" id="a-zhongyouwuliu" data-code="zhongyouwuliu">
					<span>中邮物流</span>
				</a>
			</dd>
		</dl>';
	return $html;
}

猜你喜欢

转载自blog.csdn.net/qq_36025814/article/details/85338501