browser-use库中的DOM元素表示与处理

代码

from dataclasses import dataclass
from typing import TYPE_CHECKING, Optional

from pydantic import BaseModel


@dataclass
class HashedDomElement:
	"""
	Hash of the dom element to be used as a unique identifier
	"""

	branch_path_hash: str
	attributes_hash: str
	xpath_hash: str
	# text_hash: str


class Coordinates(BaseModel):
	x: int
	y: int


class CoordinateSet(BaseModel):
	top_left: Coordinates
	top_right: Coordinates
	bottom_left: Coordinates
	bottom_right: Coordinates
	center: Coordinates
	width: int
	height: int


class ViewportInfo(BaseModel):
	scroll_x: int
	scroll_y: int
	width: int
	height: int


@dataclass
class DOMHistoryElement:
	tag_name: str
	xpath: str
	highlight_index: Optional[int]
	entire_parent_branch_path: list[str]
	attributes: dict[str, str]
	shadow_root: bool = False
	css_selector: Optional[str] = None
	page_coordinates: Optional[CoordinateSet] = None
	viewport_coordinates: Optional[CoordinateSet] = None
	viewport_info: Optional[ViewportInfo] = None

	def to_dict(self) -> dict:
		page_coordinates = self.page_coordinates.model_dump() if self.page_coordinates else None
		viewport_coordinates = self.viewport_coordinates.model_dump() if self.viewport_coordinates else None
		viewport_info = self.viewport_info.model_dump() if self.viewport_info else None

		return {
    
    
			'tag_name': self.tag_name,
			'xpath': self.xpath,
			'highlight_index': self.highlight_index,
			'entire_parent_branch_path': self.entire_parent_branch_path,
			'attributes': self.attributes,
			'shadow_root': self.shadow_root,
			'css_selector': self.css_selector,
			'page_coordinates': page_coordinates,
			'viewport_coordinates': viewport_coordinates,
			'viewport_info': viewport_info,
		}

代码解释

  1. HashedDomElement 类

    • 用于唯一标识DOM元素的哈希值集合
    • 包含三种哈希:分支路径哈希、属性哈希和XPath哈希
    • 注释掉的text_hash表明可能计划但尚未实现文本内容哈希
  2. Coordinates 类

    • 简单的坐标点模型
    • 包含x和y坐标值
  3. CoordinateSet 类

    • 表示元素在页面中的位置和尺寸
    • 包含四个角的坐标和中心点坐标
    • 记录元素的宽度和高度
  4. ViewportInfo 类

    • 记录视口信息
    • 包含滚动位置(scroll_x, scroll_y)和视口尺寸(width, height)
  5. DOMHistoryElement 类

    • 最核心的类,用于存储DOM元素的完整信息
    • 包含元素的基本属性:
      • tag_name: 标签名称
      • xpath: 元素的XPath路径
      • highlight_index: 高亮索引(用于UI显示)
      • entire_parent_branch_path: 父元素分支路径
      • attributes: 元素属性字典
    • 包含额外信息:
      • shadow_root: 是否为Shadow DOM根元素
      • css_selector: CSS选择器
      • page_coordinates: 页面坐标(相对于整个文档)
      • viewport_coordinates: 视口坐标(相对于当前可见区域)
      • viewport_info: 视口信息
    • 提供to_dict方法将对象转换为字典格式,便于序列化

这些数据结构主要用于:

  1. 跟踪和记录DOM元素的状态和位置
  2. 在浏览器自动化过程中唯一标识元素
  3. 提供元素的位置信息以支持点击、滚动等交互操作

代码使用了Pydantic和dataclass两种不同的数据类型定义方式,前者提供了更强的数据验证功能,后者则更轻量。

示例

if __name__ == "__main__":
    import json
    
    # 测试 HashedDomElement
    hashed_element = HashedDomElement(
        branch_path_hash="a1b2c3d4",
        attributes_hash="e5f6g7h8",
        xpath_hash="i9j0k1l2"
    )
    print("HashedDomElement 示例:", hashed_element)
    
    # 测试 Coordinates
    coord = Coordinates(x=100, y=200)
    print("Coordinates 示例:", coord.model_dump())
    
    # 测试 CoordinateSet
    coord_set = CoordinateSet(
        top_left=Coordinates(x=10, y=10),
        top_right=Coordinates(x=110, y=10),
        bottom_left=Coordinates(x=10, y=60),
        bottom_right=Coordinates(x=110, y=60),
        center=Coordinates(x=60, y=35),
        width=100,
        height=50
    )
    print("CoordinateSet 示例:", coord_set.model_dump())
    
    # 测试 ViewportInfo
    viewport = ViewportInfo(
        scroll_x=0,
        scroll_y=100,
        width=1920,
        height=1080
    )
    print("ViewportInfo 示例:", viewport.model_dump())
    
    # 测试 DOMHistoryElement
    dom_element = DOMHistoryElement(
        tag_name="div",
        xpath="//div[@id='main']/div[2]",
        highlight_index=5,
        entire_parent_branch_path=["html", "body", "div", "div"],
        attributes={
    
    "id": "content", "class": "main-content"},
        css_selector="div#content.main-content",
        page_coordinates=coord_set,
        viewport_coordinates=coord_set,
        viewport_info=viewport
    )
    
    # 测试 to_dict 方法
    element_dict = dom_element.to_dict()
    print("DOMHistoryElement 转换为字典:")
    print(json.dumps(element_dict, indent=2))
    
    # 测试不同的组合
    minimal_element = DOMHistoryElement(
        tag_name="a",
        xpath="//a[@href='https://example.com']",
        highlight_index=None,
        entire_parent_branch_path=["html", "body", "div"],
        attributes={
    
    "href": "https://example.com", "class": "link"}
    )
    print("\n最小化 DOMHistoryElement:")
    print(json.dumps(minimal_element.to_dict(), indent=2))
'''
(browser-use) D:\llm\browser-use-test\browser-use>python D:\llm\browser-use-test\browser-use\new-browser-use\test_and_md\dom_view.py
HashedDomElement 示例: HashedDomElement(branch_path_hash='a1b2c3d4', attributes_hash='e5f6g7h8', xpath_hash='i9j0k1l2')
Coordinates 示例: {
    
    'x': 100, 'y': 200}
CoordinateSet 示例: {
    
    'top_left': {
    
    'x': 10, 'y': 10}, 'top_right': {
    
    'x': 110, 'y': 10}, 'bottom_left': {
    
    'x': 10, 'y': 60}, 'bottom_right': {
    
    'x': 110, 'y': 60}, 'center': {
    
    'x': 60, 'y': 35}, 'width': 100, 'height': 50}
ViewportInfo 示例: {
    
    'scroll_x': 0, 'scroll_y': 100, 'width': 1920, 'height': 1080}
DOMHistoryElement 转换为字典:
{
    
    
  "tag_name": "div",
  "xpath": "//div[@id='main']/div[2]",
  "highlight_index": 5,
  "entire_parent_branch_path": [
    "html",
    "body",
    "div",
    "div"
  ],
  "attributes": {
    
    
    "id": "content",
    "class": "main-content"
  },
  "shadow_root": false,
  "css_selector": "div#content.main-content",
  "page_coordinates": {
    
    
    "top_left": {
    
    
      "x": 10,
      "y": 10
    },
    "top_right": {
    
    
      "x": 110,
      "y": 10
    },
    "bottom_left": {
    
    
      "x": 10,
      "y": 60
    },
    "bottom_right": {
    
    
      "x": 110,
      "y": 60
    },
    "center": {
    
    
      "x": 60,
      "y": 35
    },
    "width": 100,
    "height": 50
  },
  "viewport_coordinates": {
    
    
    "top_left": {
    
    
      "x": 10,
      "y": 10
    },
    "top_right": {
    
    
      "x": 110,
      "y": 10
    },
    "bottom_left": {
    
    
      "x": 10,
      "y": 60
    },
    "bottom_right": {
    
    
      "x": 110,
      "y": 60
    },
    "center": {
    
    
      "x": 60,
      "y": 35
    },
    "width": 100,
    "height": 50
  },
  "viewport_info": {
    
    
    "scroll_x": 0,
    "scroll_y": 100,
    "width": 1920,
    "height": 1080
  }
}

最小化 DOMHistoryElement:
{
    
    
  "tag_name": "a",
  "xpath": "//a[@href='https://example.com']",
  "highlight_index": null,
  "entire_parent_branch_path": [
    "html",
    "body",
    "div"
  ],
  "attributes": {
    
    
    "href": "https://example.com",
    "class": "link"
  },
  "shadow_root": false,
  "css_selector": null,
  "page_coordinates": null,
  "viewport_coordinates": null,
  "viewport_info": null
}