爬虫入门实例(二)

import requests
from bs4 import BeautifulSoup
r = requests.get("此处为可爬取的网址")#本人在MOOC上学习,用的是http://python123.io/ws/demo.html
text = r.text
print(text)
#输出整篇文章的源码
#print(soup.a.prettify())美化输出与上述输出格式一样
soup = BeautifulSoup(text, "html.parser")
for sibling in soup.a.next_sibling:
    print(sibling)#成列输出可返回某个元素之后紧跟的元素
for sibling in soup.a.previous_sibling:
    print(sibling)#成列输出返回选定节点的上一个同级节点(在相同树层级中的前一个节点)。 如果不存在这样的节点,则该属性返回 null。
print("soup.head:")
print(soup.head)
print("soup.title.parent")
print(soup.title .parent)
print(soup.head.contents) # 输出head的儿子节点
print(len(soup.body.contents))
print("soup.body.contents")
print(soup.body.contents)
for parent in soup.a.parents:
    if parent is None:
        print(parent) #并未有parent为空的情况
    else:
       print(parent.name)

猜你喜欢

转载自blog.csdn.net/dldl1718/article/details/86610164