Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to count and order by number of incoming edges

Tags:

graph

arangodb

I am trying to get get the ten people who have starred in the most movies and get the name and the number of movies in decending order. I am using the IMDB dataset.

This is what I have so far:

arangosh [_system]>   var Graph = require("org/arangodb/graph").Graph;
arangosh [_system]>   var db = require("org/arangodb").db;
arangosh [_system]> statement = db._createStatement({query: 'FOR vert IN imdb_vertices FILTER vert.type == "Person" LET edge_count = (LENGTH(EDGES(imdb_edges, vert, "outbound", [{type: "ACTS_IN"}]))) RETURN {"name": vert.name,  "count": edge_count}'})
[object ArangoStatement]

arangosh [_system]> cursor = statement.execute()
[object ArangoQueryCursor]

arangosh [_system]> cursor.next()
{ 
  "name" : "Stephanie Faracy", 
  "count" : 0 
}

How can I do that with AQL?

like image 970
mikewilliamson Avatar asked May 01 '14 01:05

mikewilliamson


1 Answers

Without an index: The following will return the Top-10:

arangosh [_system]> db._createStatement({query: 'FOR vert IN imdb_vertices FILTER vert.type == "Person" LET edge_count = (LENGTH(EDGES(imdb_edges, vert, "outbound", [{"type": "Role", "$label": "ACTS_IN"}]))) SORT edge_count DESC LIMIT 10 RETURN {"name": vert.name,  "count": edge_count}'}).execute().toArray()
[ 
  { 
    "name" : "Clint Eastwood", 
    "count" : 148 
  }, 
  { 
    "name" : "Claude Jade", 
    "count" : 142 
  }, 
  { 
    "name" : "Samuel L. Jackson", 
    "count" : 122 
  }, 
  { 
    "name" : "Armin Mueller-Stahl", 
    "count" : 112 
  }, 
  { 
    "name" : "Gérard Depardieu", 
    "count" : 104 
  }, 
  { 
    "name" : "Marisa Mell", 
    "count" : 104 
  }, 
  { 
    "name" : "Robert De Niro", 
    "count" : 104 
  }, 
  { 
    "name" : "Bruce Willis", 
    "count" : 96 
  }, 
  { 
    "name" : "Jackie Chan", 
    "count" : 94 
  }, 
  { 
    "name" : "Michael Caine", 
    "count" : 90 
  } 
]

Basically you can use the "sort" also for variables created with LET. Limit allows you to restrict to the TOP 10. Note that the type in vertex is 'Role' and label is 'ACTS_IN'.

It would be more efficient to add the number to the documents and use a sorted index. But this would require to updating the documents.

arangosh [_system]> c = db._createStatement({query: 'FOR vert IN imdb_vertices FILTER vert.type == "Person" LET edge_count = (LENGTH(EDGES(imdb_edges, vert, "outbound", [{"type": "Role", "$label": "ACTS_IN"}]))) RETURN {"_key": vert._key,  "count": edge_count}'}).execute()
[object ArangoQueryCursor]

arangosh [_system]> while (c.hasNext()) { var d = c.next(); db.imdb_vertices.update(d._key, {COUNT: d.count}); }

arangosh [_system]> db.imdb_vertices.ensureSkiplist("COUNT");

arangosh [_system]> x = db._createStatement({query: 'FOR vert in imdb_vertices FILTER vert.COUNT >= 0 SORT vert.COUNT DESC LIMIT 10 RETURN vert'}).execute()
[object ArangoQueryCursor]
like image 119
fceller Avatar answered Nov 06 '22 09:11

fceller