xpath 使用方法(starts-with) 匹配头

html_doc = '''
 <html>
    <head>
        <title>
             The Dormouse's story
        </title>
    </head>
    <body>
        <p class="title">
            <b>
                The Dormouse's story
            </b>
        </p>
        <p class="story">
            Once upon a time there were three little sisters; and their names were
            <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
            <a class="sister" href="http://example.com/lacie" id="link2">Lacie</a>
            and
            <a class="sister" href="http://example.com/tillie" id="link2">Tillie</a>
            and they lived at the bottom of a well.
        </p>
        <p class="story">
            测试中文
        </p>
        <ul>
            <li> python </li>
            <li> C# </li>
            <li> java </li>
            <li> .net </li>
        </ul>
    </body>

 </html>
'''
from lxml import etree
'''
from 解析正文 import html_doc 导入解析内容即(html)
from lxml import etree  使用Xpath 所依赖的包
'''
# print(html_doc)
etr = etree.HTML(html_doc)  #创建解析对象 html 对象
a_like=etr.xpath('//p[@class="story"]/a[starts-with(@href,"http://example.com/")]/@href')
a_text=etr.xpath('//p[@class="story"]/a[starts-with(@href,"http://example.com/")]/text()')
'''
详细解析:
// 直接定位到 class 等于 story 的p 标签
/ 定位到 a 标签 并且 a 标签的href 属性 要以"http://example.com/" 开头
starts-with 可以用于其它地方 比如
p[starts-with(@class,"s")]   意思就是 寻找 p 标签的class 是s 的元素
'''
t_l=zip(a_text,a_like)
link_text={}
for l,t  in t_l:
    link_text.update({l:t})
print(link_text)
print(link_text.get('Elsie','没有'))


猜你喜欢

转载自blog.csdn.net/u012593871/article/details/80182213