Python implementation of Tarjan's algorithm

This article presents Tarjan's algorithm for solving strongly connected components of directed graphs in linear time, with Python code.


Related concepts


Directed graph example

Strongly connected: nodes can reach each other in a directed graph
Strongly connected graph: a directed graph in which any two nodes are strongly connected Strongly connected
component (SCC): a very strongly connected subgraph of a directed graph

low-link value (LLV, Chinese literal translation: low link value): In the depth-first search (DFS) process, the minimum node number (including itself) that a node can reach

Algorithm process


  • Start depth-first search: visit an unvisited node, the number is self-increasing, initialize its LLV as the number, then mark the node as visited, and push it into the stack;
  • Depth-first search callback: if the adjacent node (forward) is in the stack, update the LLV value of the current node;
  • Adjacent node access ends: If the current node is the start node of a strongly connected component (SCC), perform the stack operation until the current node is popped.

Note:
A node that has visited all adjacent nodes (out-degree) does not consider the path to it (in-degree), so as to ensure that the nodes connected in one direction are not in the same strongly connected component.

calculation example


Calculation example 1 is the same as the previous example:
Calculation example 1

Calculation example 2:
Calculation example 2

Code


node.py

from typing import List


class Node(object):
    def __init__(self, id: int, parents: List[int], descendants: List[int]) -> None:
        """
        node initialise
        
        :param id:  node ID
        :param parents:  from which nodes can come to current node directly
        :param descendants:  from current node can go to which nodes directly
        """

        self.id = id
        self.parents = parents
        self.descendants = descendants


algorithm.py

from typing import Dict

from node import Node


class Tarjan(object):
    """
    Tarjan's algorithm
    """
    def __init__(self, nodes: Dict[int, Node]) -> None:
        """
        data initialise
        
        :param nodes:  node dictionary
        """
        
        self.nodes = nodes

        # intermediate data
        self.unvisited_flag = -1
        self.serial = 0  # serial number of current node
        self.num_scc = 0  # current SCC
        self.serials = {
    
    i: self.unvisited_flag for i in nodes.keys()}  # each node's serial number
        self.low = {
    
    i: 0 for i in nodes.keys()}  # each node's low-link value
        self.stack = []  # node stack
        self.on_stack = {
    
    i: False for i in nodes.keys()}  # if each node on stack

        # run algorithm
        self.list_scc = []  # final result
        self._find_scc()

    def _find_scc(self):
        """
        algorithm main function
        """

        for i in self.nodes.keys():
            self.serials[i] = self.unvisited_flag

        for i in self.nodes.keys():
            if self.serials[i] == self.unvisited_flag:
                self._dfs(node_id_at=i)

        # result process
        dict_scc = {
    
    }
        for i in self.low.keys():
            if self.low[i] not in dict_scc.keys():
                dict_scc[self.low[i]] = [i]
            else:
                dict_scc[self.low[i]].append(i)
        self.list_scc = list(dict_scc.values())

    def _dfs(self, node_id_at: int):
        """
        algorithm recursion function
        
        :param node_id_at:  current node ID
        """

        self.stack.append(node_id_at)
        self.on_stack[node_id_at] = True
        self.serials[node_id_at] = self.low[node_id_at] = self.serial
        self.serial += 1

        # visit all neighbours
        for node_id_to in self.nodes[node_id_at].descendants:
            if self.serials[node_id_to] == self.unvisited_flag:
                self._dfs(node_id_at=node_id_to)
            
            # minimise the low-link number
            if self.on_stack[node_id_to]:
                self.low[node_id_at] = min(self.low[node_id_at], self.low[node_id_to])

        # After visited all neighbours, if reach start node of current SCC, empty stack until back to start node.
        if self.serials[node_id_at] == self.low[node_id_at]:
            node_id = self.stack.pop()
            self.on_stack[node_id] = False
            self.low[node_id] = self.serials[node_id_at]
            while node_id != node_id_at:
                node_id = self.stack.pop()
                self.on_stack[node_id] = False
                self.low[node_id] = self.serials[node_id_at]

            self.num_scc += 1


main.py

from node import Node
from algorithm import Tarjan


# params
# case 1
num_node = 8
connections = [
    [0, 1, 0, 0, 0, 0, 0, 0], 
    [0, 0, 1, 0, 0, 0, 0, 0], 
    [1, 0, 0, 0, 0, 0, 0, 0], 
    [0, 0, 0, 0, 1, 0, 0, 1], 
    [0, 0, 0, 0, 0, 1, 0, 0], 
    [1, 0, 0, 0, 0, 0, 1, 0], 
    [1, 0, 1, 0, 1, 0, 0, 0], 
    [0, 0, 0, 1, 0, 1, 0, 0]
]
# # case 2
# num_node = 6
# connections = [
#     [0, 1, 1, 0, 0, 0], 
#     [0, 0, 0, 1, 0, 0], 
#     [0, 0, 0, 1, 1, 0], 
#     [1, 0, 0, 0, 0, 1], 
#     [0, 0, 0, 0, 0, 1], 
#     [0, 0, 0, 0, 0, 0]
# ]

# nodes
nodes = {
    
    i: Node(id=i, parents=[j for j in range(num_node) if connections[j][i]], 
                 descendants=[j for j in range(num_node) if connections[i][j]]) for i in range(num_node)}

# algorithm
tarjan = Tarjan(nodes=nodes)
print()
print("strongly connected components:")
for scc in tarjan.list_scc:
    print(scc)
print()


operation result


Calculation example 1:
Run result 1

Calculation example 2:
Running result 2

Guess you like

Origin blog.csdn.net/Zhang_0702_China/article/details/130114031