Label spreading and propagation on graphs.

Graphs
How diffusion of labels helps to classify and detect communities.

Label propagation is a semi-supervised machine learning algorithm used for classification and community detection tasks on graphs. It works by propagating labels through the graph based on the structure and connectivity of the nodes:

Label propagation is particularly useful in scenarios where labeled data is scarce but the graph structure is informative. It leverages the assumption that connected nodes are likely to share the same label.

Note that Neo4j has the label propagation algorithm via GDS. Other vendors, like Memgraph, prefer the LabelRankT variation with the advantage that it can be executed via a trigger. This triggered detection makes it very useful for streaming data and realtime anomaly detection.

Simplistic

The most basic version of label propagation could be like this:

import networkx as nx
import numpy as np

def label_propagation(G, labels, max_iter=100):
    nodes = list(G.nodes())
    for _ in range(max_iter):
        new_labels = labels.copy()
        for node in nodes:
            if labels[node] is None:
                neighbor_labels = [labels[neighbor] for neighbor in G.neighbors(node) if labels[neighbor] is not None]
                if neighbor_labels:
                    new_labels[node] = max(set(neighbor_labels), key=neighbor_labels.count)
        if new_labels == labels:
            break
        labels = new_labels
    return labels

If we apply this to the karate graph and assign None to all nodes except to number 33 and 0:

G = nx.karate_club_graph()
initial_labels = {node: None for node in G.nodes()}
initial_labels[0] = 0  # Label node 0 as class 0
initial_labels[33] = 1  # Label node 33 as class 1

final_labels = label_propagation(G, initial_labels)
print(final_labels)
{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 1, 10: 0, 11: 0, 12: 0, 13: 0, 14: 1, 15: 1, 16: 0, 17: 0, 18: 1, 19: 0, 20: 1, 21: 0, 22: 1, 23: 1, 24: 0, 25: 0, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 0, 32: 1, 33: 1}

This can be visualized for instance with PyVis:

from pyvis import network as net
g=net.Network(notebook=True, cdn_resources='in_line')
g.from_nx(G)
def set_options(net_g):
    options = """
var options = {
   "configure": {
        "enabled": true
   },
  "edges": {
    "color": {
      "inherit": false
    },
    "smooth": true
  },
  "physics": {
    "barnesHut": {
      "gravitationalConstant": -3000.0
    },
    "minVelocity": 0.75
  }
}
"""
    net_g.set_options(options) 


set_options(g)
g.show("simple.html")
simple.html

By setting the size of the initial propagating nodes we get:

g.nodes[0]["color"]="#00ff00"
g.nodes[0]["size"]=20
g.nodes[33]["color"]="#ff0000"
g.nodes[33]["size"]=20
for i in final_labels:
    if final_labels[i]==1:
        g.nodes[i]["color"]="#ff0000"
    else:
        g.nodes[i]["color"]="#00ff00"
    g.nodes[i]["title"]=str(i)
g.show("example.html")  
example.html

You can see that this simplistic propagation algorithm ain’t very good. There are some nodes with seem to be isolated.

Label propagation

A more rigorous approach can be seen in Xiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data with label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon University, 2002 and the algorithm there can be summed up as:

\[ \begin{array}{l}{ \mathbf{W} \text {: adjacency matrix of the graph} \\ \text { Compute the diagonal degree matrix } \mathbf{D} \text { by } \mathbf{D}_{i i} \leftarrow \sum_{j} W_{i j}} \\ {\text { Initialize } \hat{Y}^{(0)} \leftarrow\left(y_{1}, \ldots, y_{l}, 0,0, \ldots, 0\right)} \\ {\text { Iterate }} \\ {\text { 1. } \hat{Y}^{(t+1)} \leftarrow \mathbf{D}^{-1} \mathbf{W} \hat{Y}^{(t)}} \\ {\text { 2. } \hat{Y}_{l}^{(t+1)} \leftarrow Y_{l}} \\ {\text { until convergence to } \hat{Y}^{(\infty)}} \\ {\text { Label point } x_{i} \text { by the sign of } \hat{y}_{i}^{(\infty)}}\end{array} \]

with a graph where initially some nodes are labelled and the propagation assigns labels via diffusion.

Below you can find a label spreading algorithm and we’ll base both algorithm on the same base class:

from abc import abstractmethod
import torch

class BaseLabelPropagation:   
    def __init__(self, adj_matrix):
        self.norm_adj_matrix = self._normalize(adj_matrix)
        self.n_nodes = adj_matrix.size(0)
        self.one_hot_labels = None 
        self.n_classes = None
        self.labeled_mask = None
        self.predictions = None

    @staticmethod
    @abstractmethod
    def _normalize(adj_matrix):
        raise NotImplementedError("_normalize must be implemented")

    @abstractmethod
    def _propagate(self):
        raise NotImplementedError("_propagate must be implemented")

    def _one_hot_encode(self, labels):
        # Get the number of classes
        classes = torch.unique(labels)
        classes = classes[classes != -1]
        self.n_classes = classes.size(0)

        # One-hot encode labeled data instances and zero rows corresponding to unlabeled instances
        unlabeled_mask = (labels == -1)
        labels = labels.clone()  # defensive copying
        labels[unlabeled_mask] = 0
        self.one_hot_labels = torch.zeros((self.n_nodes, self.n_classes), dtype=torch.float)
        self.one_hot_labels = self.one_hot_labels.scatter(1, labels.unsqueeze(1), 1)
        self.one_hot_labels[unlabeled_mask, 0] = 0

        self.labeled_mask = ~unlabeled_mask

    def fit(self, labels, max_iter, tol):
        """Fits a semi-supervised learning label propagation model.
        
        labels: torch.LongTensor
            Tensor of size n_nodes indicating the class number of each node.
            Unlabeled nodes are denoted with -1.
        max_iter: int
            Maximum number of iterations allowed.
        tol: float
            Convergence tolerance: threshold to consider the system at steady state.
        """
        self._one_hot_encode(labels)

        self.predictions = self.one_hot_labels.clone()
        prev_predictions = torch.zeros((self.n_nodes, self.n_classes), dtype=torch.float)

        for i in range(max_iter):
            # Stop iterations if the system is considered at a steady state
            variation = torch.abs(self.predictions - prev_predictions).sum().item()
            
            if variation < tol:
                print(f"The method stopped after {i} iterations, variation={variation:.4f}.")
                break

            prev_predictions = self.predictions
            self._propagate()

    def predict(self):
        return self.predictions

    def predict_classes(self):
        return self.predictions.max(dim=1).indices

The Zhu and Ghahramani algorithm is now:

class LabelPropagation(BaseLabelPropagation):
    def __init__(self, adj_matrix):
        super().__init__(adj_matrix)

    @staticmethod
    def _normalize(adj_matrix):
        """Computes D^-1 * W"""
        degs = adj_matrix.sum(dim=1)
        degs[degs == 0] = 1  # divide by zero
        return adj_matrix / degs[:, None]

    def _propagate(self):
        self.predictions = torch.matmul(self.norm_adj_matrix, self.predictions)

        # Put back already known labels
        self.predictions[self.labeled_mask] = self.one_hot_labels[self.labeled_mask]

    def fit(self, labels, max_iter=1000, tol=1e-3):
        super().fit(labels, max_iter, tol)

Label spreading

The Label Spreading algorithm from Dengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston, Bernhard Schoelkopf. Learning with local and global consistency (2004) is as follows:

\[ \begin{array}{l}{ \mathbf{W} \text {: adjacency matrix of the graph} \\ \text { Compute the diagonal degree matrix } \mathbf{D} \text { by } \mathbf{D}_{i i} \leftarrow \sum_{j} W_{i j}} \\ {\text { Compute the normalized graph Laplacian } \\ \mathcal{L} \leftarrow \mathbf{D}^{-1 / 2} \mathbf{W} \mathbf{D}^{-1 / 2}} \\ {\text { Initialize } \hat{Y}^{(0)} \leftarrow\left(y_{1}, \ldots, y_{l}, 0,0, \ldots, 0\right)} \\ {\text { Choose a parameter } \alpha \in[0,1)} \\ {\text { Iterate } \hat{Y}(t+1) \leftarrow \alpha \mathcal{L} \hat{Y}^{(t)}+(1-\alpha) \hat{Y}^{(0)} \text { until convergence to } \hat{Y}^{(\infty)}} \\ {\text { Label point } x_{i} \text { by the sign of } \hat{y}_{i}^{(\infty)}} \end{array} \]

Note that the Laplacian is used here rather than the odd \(\mathbf{D}^{-1} \mathbf{W} \hat{Y}^{(t)}\) matrix above. The appearance of the Laplacian mimics closer the continuous diffusion equation.

class LabelSpreading(BaseLabelPropagation):
    def __init__(self, adj_matrix):
        super().__init__(adj_matrix)
        self.alpha = None

    @staticmethod
    def _normalize(adj_matrix):
        """Computes D^-1/2 * W * D^-1/2"""
        degs = adj_matrix.sum(dim=1)
        norm = torch.pow(degs, -0.5)
        norm[torch.isinf(norm)] = 1
        return adj_matrix * norm[:, None] * norm[None, :]

    def _propagate(self):
        self.predictions = (
            self.alpha * torch.matmul(self.norm_adj_matrix, self.predictions)
            + (1 - self.alpha) * self.one_hot_labels
        )
    
    def fit(self, labels, max_iter=3000, tol=1e-5, alpha=0.5):
        """
        Parameters
        ----------
        alpha: float
            Clamping factor.
        """
        self.alpha = alpha
        super().fit(labels, max_iter, tol)

Testing models on synthetic data

Let’s apply these two propagations to our karate graph:

G = nx.karate_club_graph()
adj_matrix = nx.adjacency_matrix(G).toarray()
# Create labels
labels = np.full(len(G.nodes), -1.)
labels[0]=0
labels[33]=1

adj_matrix_t = torch.FloatTensor(adj_matrix)
labels_t = torch.LongTensor(labels)

# Learn with Label Propagation
label_propagation = LabelPropagation(adj_matrix_t)
print("Label Propagation: ", end="")
label_propagation.fit(labels_t)
label_propagation_output_labels = label_propagation.predict_classes()

# Learn with Label Spreading
label_spreading = LabelSpreading(adj_matrix_t)
print("Label Spreading: ", end="")
label_spreading.fit(labels_t, alpha=0.8)
label_spreading_output_labels = label_spreading.predict_classes()
Label Propagation: The method stopped after 39 iterations, variation=0.0008.
Label Spreading: The method stopped after 31 iterations, variation=0.0000.

The label propagation can be seen via this rendering:

prop_labels = label_propagation_output_labels.numpy()
g=net.Network(notebook=True, cdn_resources='in_line')
g.from_nx(G)
set_options(g)
g.nodes[0]["color"]="#00ff00"
g.nodes[0]["size"]=20
g.nodes[33]["color"]="#ff0000"
g.nodes[33]["size"]=20
for i,v in enumerate(prop_labels):
    if prop_labels[i]==1:
        g.nodes[i]["color"]="#ff0000"
    else:
        g.nodes[i]["color"]="#00ff00"
    g.nodes[i]["title"]=str(i)
g.show("propagation.html") 
propagation.html

The label spreading gives:

spread_labels = label_spreading_output_labels.numpy()
g=net.Network(notebook=True, cdn_resources='in_line')
g.from_nx(G)
set_options(g)
g.nodes[0]["color"]="#00ff00"
g.nodes[0]["size"]=20
g.nodes[33]["color"]="#ff0000"
g.nodes[33]["size"]=20
for i,v in enumerate(spread_labels):
    if spread_labels[i]==1:
        g.nodes[i]["color"]="#ff0000"
    else:
        g.nodes[i]["color"]="#00ff00"
    g.nodes[i]["title"]=str(i)
g.show("spreading.html") 
spreading.html

If you apply these to the caveman graph where the propagation reflect the community topology:

import pandas as pd
import numpy as np
import networkx as nx


# Create caveman graph
n_cliques = 4
size_cliques = 10
caveman_graph = nx.connected_caveman_graph(n_cliques, size_cliques)
adj_matrix = nx.adjacency_matrix(caveman_graph).toarray()

# Create labels
labels = np.full(n_cliques * size_cliques, -1.)

# Only one node per clique is labeled. Each clique belongs to a different class.
labels[0] = 0
labels[size_cliques] = 1
labels[size_cliques * 2] = 2
labels[size_cliques * 3] = 3

# Create input tensors
adj_matrix_t = torch.FloatTensor(adj_matrix)
labels_t = torch.LongTensor(labels)

# Learn with Label Propagation
label_propagation = LabelPropagation(adj_matrix_t)
print("Label Propagation: ", end="")
label_propagation.fit(labels_t)
label_propagation_output_labels = label_propagation.predict_classes()

# Learn with Label Spreading
label_spreading = LabelSpreading(adj_matrix_t)
print("Label Spreading: ", end="")
label_spreading.fit(labels_t, alpha=0.8)
label_spreading_output_labels = label_spreading.predict_classes()
Label Propagation: The method stopped after 73 iterations, variation=0.0010.
Label Spreading: The method stopped after 39 iterations, variation=0.0000.
spread_labels = label_spreading_output_labels.numpy()
g=net.Network(notebook=True, cdn_resources='in_line')
g.from_nx(caveman_graph)
set_options(g)
g.nodes[0]["color"]="#00ff00"
g.nodes[0]["size"]=20
g.nodes[33]["color"]="#ff0000"
g.nodes[33]["size"]=20
color_map=["#ff0000","#00ff00","#0000ff","#ff00ff"]
for i,v in enumerate(spread_labels):
    g.nodes[i]["color"]=color_map[spread_labels[i]]
    g.nodes[i]["title"]=str(i)
g.show("caveman.html") 
caveman.html

Aside from the three anomalies this gives indeed a good result.