用python+graphviz/networkx画目录结构树状图

想着用python绘制某目录的树状图，一开始想到了用grapgviz，因为去年离职的时候整理文档，用graphviz画过代码调用结构图。graphviz有一门自己的语言DOT，dot很简单，加点加边设置属性就这点东西，而且有python接口。

我在ubuntu下，先要安装graphviz软件，官网有deb包，然后python安装pygraphviz模块。

目标功能是输入一个路径，输出该路径下的目录结构以及文件和文件夹的大小

# -*- coding:utf-8 -*-
import pygraphviz as pgv
import os
from os.path import getsize, join
import sys
import Queue

reload(sys)
sys.setdefaultencoding('utf8')


class DrawDirectorTree():
    """
    绘制目录结构图，用树的形式
    """

    @classmethod
    def getdirsize(cls, dir):
        """
        获取文件夹大小
        :param dir:
        :return: 返回尺寸
        """
        size = 0L

        for root, dirs, files in os.walk(dir):
            size += sum([getsize(join(root, name)) for name in files])

        return size

    @classmethod
    def draw_director_tree(cls, input_path):
        """
        深度遍历一个目录，绘制目录树形图
        :param input_path: 目标目录
        :return:
        """
        if (not os.path.exists(input_path)) or (not os.path.isdir(input_path)):
            print "Input_path Error!"
            return None
        # 用队列做BFS
        director_queue = Queue.Queue()
        director_queue.put(input_path)
        # 初始化一个图
        tree_graph = pgv.AGraph(directed=True, strict=True)
        tree_graph.node_attr['style'] = 'filled'
        tree_graph.node_attr['shape'] = 'square'

        tree_graph.add_node(input_path + "\n" + str(os.path.getsize(input_path)))
        while not director_queue.empty():
            new_parent = director_queue.get()
            if os.path.isdir(new_parent):
                child_list = os.listdir(new_parent)
                for child in child_list:
                    full_child = join(new_parent, child)
                    if os.path.isfile(full_child):
                        new_parent_lable = new_parent + "\n" + str(os.path.getsize(new_parent))
                        child_lable = full_child + "\n" + str(os.path.getsize(full_child))
                        tree_graph.add_node(child_lable)
                        tree_graph.add_edge(new_parent_lable, child_lable)
                    elif os.path.isdir(full_child):
                        new_parent_lable = new_parent + "\n" + str(os.path.getsize(new_parent))
                        child_lable = full_child + "\n" + str(os.path.getsize(full_child))
                        tree_graph.add_node(child_lable)
                        tree_graph.add_edge(new_parent_lable, child_lable)
                        director_queue.put(full_child)
        tree_graph.graph_attr['epsilon'] = '0.001'
        print tree_graph.string()  # print dot file to standard output
        tree_graph.write('director_tree.dot')
        tree_graph.layout('dot')  # layout with dot
        tree_graph.draw('director_tree.png')  # write to file


DrawDirectorTree.draw_director_tree('/home/aron/workspace/python_space/')

运行会生成一个dot文件，就是有向图的结构文件，还有存为一个png图

事实上效果不太好，小的目录可以搞定，大的目录就耗时太长了，主要是计算文件夹尺寸本身就很耗时，而且无法生成巨大的结构图。

之后发现networkx模块的使用方式几乎与graphviz一样，add_node、add_edge，就写了一版networkx的。

# -*- coding:utf-8 -*-
import os
from os.path import getsize, join
import sys
import Queue
import networkx as nx
import matplotlib.pyplot as plt
reload(sys)
sys.setdefaultencoding('utf8')


class DirectorTreeNetworkx():
    """
    用networkx绘制目录结构图
    """

    @classmethod
    def getdirsize(cls, dir):
        """
        获取文件夹大小
        :param dir:
        :return: 返回尺寸
        """
        size = 0L

        for root, dirs, files in os.walk(dir):
            size += sum([getsize(join(root, name)) for name in files])

        return size

    @classmethod
    def draw_director_tree(cls, input_path):
        """
        深度遍历一个目录，绘制目录树形图
        :param input_path: 目标目录
        :return:
        """
        if (not os.path.exists(input_path)) or (not os.path.isdir(input_path)):
            print "Input_path Error!"
            return None
        # 用队列做BFS
        director_queue = Queue.Queue()
        director_queue.put(input_path)
        # 初始化一个图
        tree_graph = nx.DiGraph()


        tree_graph.add_node(input_path + "\n" + str(os.path.getsize(input_path)))
        while not director_queue.empty():
            new_parent = director_queue.get()
            if os.path.isdir(new_parent):
                child_list = os.listdir(new_parent)
                for child in child_list:
                    full_child = join(new_parent, child)
                    if os.path.isfile(full_child):
                        new_parent_lable = new_parent + "\n" + str(os.path.getsize(new_parent))
                        child_lable = full_child + "\n" + str(os.path.getsize(full_child))
                        tree_graph.add_node(child_lable)
                        tree_graph.add_edge(new_parent_lable, child_lable)
                    elif os.path.isdir(full_child):
                        new_parent_lable = new_parent + "\n" + str(os.path.getsize(new_parent))
                        child_lable = full_child + "\n" + str(os.path.getsize(full_child))
                        tree_graph.add_node(child_lable)
                        tree_graph.add_edge(new_parent_lable, child_lable)
                        director_queue.put(full_child)
        nx.draw(tree_graph)
        plt.show()

能够解决稍大一点的目录，主要是生成网络图片比graphviz快一点，能展示稍微复杂一点的网络结构比如

这个也就玩一玩，真正做目录结构展示还得想别的办法，同时不能期望在获取结构的同时计算文件夹大小。

上面的代码计算文件夹大小的策略也有点问题，存在重复计算，算一次就够了。同时还可以先构建完结构自底向上的计算目录大小，这样就避免每计算一层目录都要walk一遍求和了，下层文件大小只计算一次就行了。

用python+graphviz/networkx画目录结构树状图

猜你喜欢