# !pip install faker
import networkx as nx
import matplotlib.pyplot as plt
from faker import Faker
= Faker()
faker %matplotlib inline
NetworkX Overview
NetworkX is a comprehensive Python library designed for the creation, manipulation, and study of complex networks and graphs. It provides tools to work with both simple and complex graph structures, including directed and undirected graphs, multigraphs, and more. NetworkX supports a wide range of graph algorithms, such as shortest path, clustering, and network flow, making it suitable for various applications in social network analysis, biology, computer science, and more. The library is highly flexible and integrates well with other scientific computing libraries like NumPy and SciPy, allowing for efficient computation and analysis of large-scale networks.
Setup
Creating graphs
Make sure that you understand what the scope is of a particular graph instance. The graph algorithms in NetworkX usually apply only to a few types of graphs. There are, however, method which convert things. E.g. to convert a directed graph to an undirected you would use something like this
= nx.DiGraph()
G_directed 1, 2), (2, 3), (3, 1)])
G_directed.add_edges_from([(= G_directed.to_undirected() G_undirected
Instantiating a graph is as simple as:
# default
= nx.Graph()
G # an empty graph
= nx.empty_graph(100)
EG # a directed graph
= nx.DiGraph()
DG # a multi-directed graph
= nx.MultiDiGraph()
MDG # a complete graph
= nx.complete_graph(10)
CG # a path graph
= nx.path_graph(5)
PG # a complete bipartite graph
= nx.complete_bipartite_graph(5,3)
CBG # a grid graph
= nx.grid_graph([2, 3, 5, 2]) GG
Analysis
Creating and visualizing a basic graph requires more than one would expect:
import random
= nx.DiGraph()
gr 1, data={'label': 'Space'})
gr.add_node(2, data={'label': 'Time'})
gr.add_node(3, data={'label': 'Gravitation'})
gr.add_node(4, data={'label': 'Geometry'})
gr.add_node(5, data={'label': 'SU(2)'})
gr.add_node(6, data={'label': 'Spin'})
gr.add_node(7, data={'label': 'GL(n)'})
gr.add_node(= [(1, 2), (2, 3), (3, 1), (3, 4), (2, 5), (5, 6), (1, 7)]
edge_array
gr.add_edges_from(edge_array)for e in edge_array:
'data':{'weight': round(random.random(),2)}}})
nx.set_edge_attributes(gr, {e: {*e, weight=round(random.random(),2))
gr.add_edge(= {n:gr.nodes[n]["data"]["label"] for n in gr.nodes()}
labelDic = {e:gr.edges[e]["weight"] for e in G.edges}
edgeDic = nx.layout.kamada_kawai_layout(gr)
kpos = labelDic, with_labels=True, arrowsize=25)
nx.draw_networkx(gr,kpos, labels =nx.draw_networkx_edge_labels(gr, kpos, edge_labels= edgeDic, label_pos=0.4) o
If you want to turn to Numpy
nx.adjacency_matrix(gr).todense()
matrix([[0. , 0.18, 0. , 0. , 0. , 0. , 0.13],
[0. , 0. , 0.44, 0. , 0.75, 0. , 0. ],
[0.61, 0. , 0. , 0.27, 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0. , 0.28, 0. ],
[0. , 0. , 0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0. , 0. , 0. ]])
The spectrum of the graph:
nx.adjacency_spectrum(gr)
array([-0.18210492+0.31541497j, -0.18210492-0.31541497j,
0.36420984+0.j , 0. +0.j ,
0. +0.j , 0. +0.j ,
0. +0.j ])
and the Laplacian of this graph:
nx.laplacian_matrix(gr.to_undirected()).todense()
matrix([[ 1.52, -0.41, -0.47, 0. , 0. , 0. , -0.64],
[-0.41, 1.22, -0.28, 0. , -0.53, 0. , 0. ],
[-0.47, -0.28, 1.4 , -0.65, 0. , 0. , 0. ],
[ 0. , 0. , -0.65, 0.65, 0. , 0. , 0. ],
[ 0. , -0.53, 0. , 0. , 0.8 , -0.27, 0. ],
[ 0. , 0. , 0. , 0. , -0.27, 0.27, 0. ],
[-0.64, 0. , 0. , 0. , 0. , 0. , 0.64]])
="weight") nx.attr_matrix(gr, edge_attr
(matrix([[0. , 0.18, 0. , 0. , 0. , 0. , 0.13],
[0. , 0. , 0.44, 0. , 0.75, 0. , 0. ],
[0.61, 0. , 0. , 0.27, 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0. , 0.28, 0. ],
[0. , 0. , 0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0. , 0. , 0. ]]),
[1, 2, 3, 4, 5, 6, 7])
It’s common to use a centrality measure for the size of nodes:
= nx.centrality.betweenness_centrality(gr)
cent =[v * 1500 for v in cent.values()], edge_color='silver') nx.draw_networkx(gr, node_size
The weakly connected components
nx.is_connected(gr.to_undirected())= nx.components.connected_components(gr.to_undirected())
comps for c in comps:
print(c)
{1, 2, 3, 4, 5, 6, 7}
def show_clique(graph, k = 4):
'''
Draws the first clique of the specified size.
'''
= list(nx.algorithms.find_cliques(graph))
cliques = [clq for clq in cliques if len(clq) == k]
kclique if len(kclique)>0:
print(kclique[0])
= ["red" if i in kclique[0] else "white" for i in graph.nodes() ]
cols =True, node_color= cols, edge_color="silver")
nx.draw_networkx(graph, with_labelsreturn nx.subgraph(graph, kclique[0])
else:
print("No clique of size %s."%k)
return nx.Graph()
= nx.barabasi_albert_graph(50, 5)
ba = show_clique(ba,5)
subg 5)) nx.is_isomorphic(subg, nx.complete_graph(
[1, 7, 8, 5, 9]
True
Set data
Often one wishes the nodes and edges to carry a payload:
= nx.Graph()
G 12)
G.add_node(12:{'payload':{'id': 44, 'name': 'Swa' }}})
nx.set_node_attributes(G, {print(G.nodes[12]['payload'])
print(G.nodes[12]['payload']['name'])
{'id': 44, 'name': 'Swa'}
Swa
Pandas
The interplay between Pandas and NetworkX is also often crucial in an analysis:
= nx.barabasi_albert_graph(50, 5)
g # set a weight on the edges
for e in g.edges:
'weight':faker.random.random()}})
nx.set_edge_attributes(g, {e: {for n in g.nodes:
"feature": {"firstName": faker.first_name(), "lastName": faker.last_name()}}}) nx.set_node_attributes(g, {n: {
nx.to_pandas_edgelist(g)
source | target | weight | |
---|---|---|---|
0 | 0 | 5 | 0.922816 |
1 | 0 | 6 | 0.628166 |
2 | 0 | 9 | 0.947202 |
3 | 0 | 11 | 0.696823 |
4 | 0 | 20 | 0.998949 |
... | ... | ... | ... |
220 | 36 | 41 | 0.347691 |
221 | 36 | 47 | 0.417671 |
222 | 38 | 46 | 0.675554 |
223 | 38 | 49 | 0.161978 |
224 | 41 | 46 | 0.775454 |
225 rows × 3 columns
import pandas as pd
import copy
= {id:g.nodes[id]["feature"] for id in g.nodes} # easy acces to the nodes
node_dic = [] # the array we'll give to Pandas
rows for e in g.edges:
= copy.copy(node_dic[e[0]])
row "sourceId"] = e[0]
row["targetId"] = e[1]
row["weight"] = g.edges[e]["weight"]
row[
rows.append(row)= pd.DataFrame(rows)
df df
firstName | lastName | sourceId | targetId | weight | |
---|---|---|---|---|---|
0 | Julie | Aguirre | 0 | 5 | 0.922816 |
1 | Julie | Aguirre | 0 | 6 | 0.628166 |
2 | Julie | Aguirre | 0 | 9 | 0.947202 |
3 | Julie | Aguirre | 0 | 11 | 0.696823 |
4 | Julie | Aguirre | 0 | 20 | 0.998949 |
... | ... | ... | ... | ... | ... |
220 | William | Garcia | 36 | 41 | 0.347691 |
221 | William | Garcia | 36 | 47 | 0.417671 |
222 | Jane | Gray | 38 | 46 | 0.675554 |
223 | Jane | Gray | 38 | 49 | 0.161978 |
224 | Eric | Morgan | 41 | 46 | 0.775454 |
225 rows × 5 columns
Visualization
NetworkX is on its own very bad in visualizing graphs. Its strength are the algorithms on and not the display of graphs. If you want to have pretty pictures there are some options:
- within Jupyter you can use iPyCytoscape
- VisJs also has a Jupyter PyVis widget
- for more advanced graph layout you should consider the yFiles graph widget
- you can export NetworkX graphs to GML, GraphML and other formats which can be imported in Gephi, yFiles Online and yEd. See our article on Cora for more details.
The last option gives the best results but it means your data is disconnected from the main flow of your Jupyter notebook.