Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Plotting similarity matrix using Networkx

I am trying to visualize correlations(similarity score up to 1) between words using networkx

For example similarity scores between dog, cat, animal, person, wolf

Ive tried using this code to plot similarity distances between each word/node

import networkx as nx
import matplotlib.pyplot as plt
G=nx.Graph()

corr_data =([['Dog', 'Dog', 1.0],
       ['Cat', 'Dog', 0.8016854524612427],
       ['Wolf', 'Dog', 0.5206573009490967],
       ['Person', 'Dog', 0.3756750822067261],
       ['Animal', 'Dog', 0.6618534326553345],
       ['Cat', 'Cat', 1.0],
       ['Wolf', 'Cat', 0.5081626176834106],
       ['Person', 'Cat', 0.32475101947784424],
       ['Animal', 'Cat', 0.6260400414466858],
       ['Wolf', 'Wolf', 1.0],
       ['Person', 'Wolf', 0.23091702163219452],
       ['Animal', 'Wolf', 0.5261368751525879],
       ['Person', 'Person', 1.0],
       ['Animal', 'Person', 0.34220656752586365],
       ['Animal', 'Animal', 1.0]])

existing_edges = {}

def build_graph(w, lev):
  if (lev > 5)  :
      return
  for z in corr_data:
     ind=-1 
     if z[0] == w:
         ind=0
         ind1=1
     if z[1] == w:
         ind ==1
         ind1 =0

     if ind == 0 or ind == 1:
         if  str(w) + "_" + str(corr_data[ind1]) not in existing_edges :

             G.add_node(str(corr_data[ind]))
             existing_edges[str(w) + "_" + str(corr_data[ind1])] = 1;
             G.add_edge(w,str(corr_data[ind1]))

             build_graph(corr_data[ind1], lev+1)


existing_nodes = {}
def build_graph_for_all():
    count=0
    for d in corr_data:
        if (count > 40) :
            return
        if  d[0] not in existing_edges :
             G.add_node(str(d[0]))
        if  d[1] not in existing_edges :     
             G.add_node(str(d[1]))
        G.add_edge(str(d[0]), str(d[1]))     
        count=count + 1


build_graph_for_all()

print (G.nodes(data=True))
plt.show()
nx.draw(G, width=2, with_labels=True)
plt.savefig("path1.png")


w="design"
G.add_node(w)
build_graph(w, 0)

print (G.nodes(data=True))
plt.show()
nx.draw(G, width=2, with_labels=True)
plt.savefig("path.png")

The distances between my nodes look off i.e. Cat and Person plotting closer than Cat and Dog. Am I missing something obvious here?

like image 250
BadKarma1122 Avatar asked Mar 01 '26 11:03

BadKarma1122


1 Answers

There are a few things you'll need to fix. Most importantly, you can read up on how networkx draws networks using the spring layout. Basically what you need to do is to add the correlation values to your network edges.

  1. You can do this by replacing your G.add_edge line in build_graph_for_all by:

     G.add_weighted_edges_from([[str(d[0]), str(d[1]),d[2]]])     
    
  2. You can plot your network using the following code, which should take the edge weights into account.

    pos = nx.spring_layout(G,weight='weight')
    nx.draw(G,pos=pos, width=2, with_labels=True)
    
  3. If you really want to emphasize the different between high and low correlation edges, you can transform the weights as follows:

    corr_data = [[x[0],x[1],1000**(x[2])] for x in corr_data]
    

This will stretch out your weights - a low correlation of .3 will be mapped to around 1000^0.3 ≈ 8, and a perfect correlation of 1 will be mapped to 1000. Remaking the graph with these new weights and repeating the plotting code above yields the following image:

result of the plot

Here is the full working code.

import networkx as nx
import matplotlib.pyplot as plt

G=nx.Graph()

corr_data =([['Dog', 'Dog', 1.0],
       ['Cat', 'Dog', 0.8016854524612427],
       ['Wolf', 'Dog', 0.5206573009490967],
       ['Person', 'Dog', 0.3756750822067261],
       ['Animal', 'Dog', 0.6618534326553345],
       ['Cat', 'Cat', 1.0],
       ['Wolf', 'Cat', 0.5081626176834106],
       ['Person', 'Cat', 0.32475101947784424],
       ['Animal', 'Cat', 0.6260400414466858],
       ['Wolf', 'Wolf', 1.0],
       ['Person', 'Wolf', 0.23091702163219452],
       ['Animal', 'Wolf', 0.5261368751525879],
       ['Person', 'Person', 1.0],
       ['Animal', 'Person', 0.34220656752586365],
       ['Animal', 'Animal', 1.0]])

corr_data = [[x[0],x[1],1000**(x[2])] for x in corr_data]

existing_edges = {}
def build_graph(w, lev):
  if (lev > 5)  :
      return
  for z in corr_data:
     ind=-1 
     if z[0] == w:
         ind=0
         ind1=1
     if z[1] == w:
         ind ==1
         ind1 =0
     if ind == 0 or ind == 1:
         if  str(w) + "_" + str(corr_data[ind1]) not in existing_edges :
             G.add_node(str(corr_data[ind]))
             existing_edges[str(w) + "_" + str(corr_data[ind1])] = 1;
             G.add_edge(w,str(corr_data[ind1]))
             build_graph(corr_data[ind1], lev+1)

existing_nodes = {}
def build_graph_for_all():
    count=0
    for d in corr_data:
        if (count > 40) :
            return
        if  d[0] not in existing_edges :
             G.add_node(str(d[0]))
        if  d[1] not in existing_edges :     
             G.add_node(str(d[1]))
        G.add_weighted_edges_from([[str(d[0]), str(d[1]),d[2]]])     
        count=count + 1

build_graph_for_all()

pos = nx.spring_layout(G,weight='weight')
nx.draw(G,pos=pos, width=2, with_labels=True)

plt.savefig("path1.png")
like image 176
Johannes Wachs Avatar answered Mar 04 '26 00:03

Johannes Wachs



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!