I'm struggling to port some JavaScript code (which includes async and graph functionality) to R. Help please!
Here's what I'm trying to port:
import jsonpFetch from "./jsonpFetch";
import bus from '../bus';
/**
* This function builds a graph from google's auto-suggestions.
*/
export default function buildGraph(entryWord, pattern, MAX_DEPTH, progress) {
entryWord = entryWord && entryWord.trim();
if (!entryWord) return;
entryWord = entryWord.toLocaleLowerCase();
const insertPosition = pattern.indexOf('...');
if (insertPosition < 0) {
throw new Error('Query pattern is missing "..."');
}
const queryPosition = pattern.indexOf('[query]');
if (queryPosition < 0) {
throw new Error('Query pattern is missing "[query]" keyword');
}
if (insertPosition < queryPosition) {
throw new Error('[query] should come before ...');
}
let cancelled = false;
let pendingResponse;
let graph = require('ngraph.graph')();
graph.maxDepth = MAX_DEPTH;
let queue = [];
let requestDelay = 300 + Math.random() * 100;
progress.startDownload();
startQueryConstruction();
return {
dispose,
graph
}
function dispose() {
cancelled = true;
if (pendingResponse) {
pendingResponse.cancel();
pendingResponse = null;
}
}
function startQueryConstruction() {
graph.addNode(entryWord, {depth: 0});
fetchNext(entryWord);
}
function loadSiblings(parent, results) {
let q = fullQuery(parent).toLocaleLowerCase();
var parentNode = graph.getNode(parent);
if (!parentNode) {
throw new Error('Parent is missing for ' + parent);
}
results.filter(x => x.toLocaleLowerCase().indexOf(q) === 0)
.map(x => x.substring(q.length))
.forEach(other => {
const hasOtherNode = graph.hasNode(other);
const hasOtherLink = graph.getLink(other, parent) || graph.getLink(parent, other);
if (hasOtherNode) {
if (!hasOtherLink) {
graph.addLink(parent, other);
}
return;
}
let depth = parentNode.data.depth + 1;
graph.addNode(other, {depth});
graph.addLink(parent, other);
if (depth < MAX_DEPTH) queue.push(other);
});
setTimeout(loadNext, requestDelay);
}
function loadNext() {
if (cancelled) return;
if (queue.length === 0) {
bus.fire('graph-ready', graph);
return;
}
let nextWord = queue.shift();
fetchNext(nextWord);
progress.updateLayout(queue.length, nextWord);
}
function fetchNext(query) {
pendingResponse = getResponse(fullQuery(query));
pendingResponse
.then(res => onPendingReady(res, query))
.catch((msg) => {
const err = 'Failed to download ' + query + '; Message: ' + msg;
console.error(err);
progress.downloadError(err)
loadNext();
});
}
function onPendingReady(res, query) {
if (res.length >= 2) {
loadSiblings(query, res[1]);
} else {
console.error(res);
throw new Error('Unexpected response');
}
}
function fullQuery(query) {
return pattern.replace('[query]', query).replace('...', '');
}
function getResponse(query) {
return jsonpFetch('//suggestqueries.google.com/complete/search?client=firefox&q=' + encodeURIComponent(query));
}
}
And this is what I have so far in R:
# This function builds a graph from Google's Auto-Suggestions
buildGraph <- function(entryWord, pattern) {
graph <- igraph::make_empty_graph() # setup empty graph
entryWord <- trimws(entryWord) #remove leading/trailing whitespace
entryWord <- tolower(entryWord) # lowercase technology name
requestDelay <- 0.3 + runif(1, 0, 1) * 0.1 # 300 milliseconds (0.3 seconds) + some number between 0 and 1 * 100 milliseconds (0.1 seconds)
startQueryConstruction()
dispose <- function() {
cancelled <- TRUE
if (pendingResponse) {
# pendingResponse.cancel();
# pendingResponse = null;
}
}
startQueryConstruction <- function() {
graph %>% igraph::add.vertices(entryWord)
fetchNext(entryWord)
}
loadSiblings <- function(parent, results) {
q = tolower(fullQuery(parent))
parentNode <- igraph::vertex_attr(graph, parent)
if (!parentNode) {
# throw new Error('Parent is missing for ' + parent);
stderr(paste0('Parent is missing for ', parent))
}
# results.filter(x => x.toLocaleLowerCase().indexOf(q) === 0)
# .map(x => x.substring(q.length))
# .forEach(other => {
# const hasOtherNode = graph.hasNode(other);
# const hasOtherLink = graph.getLink(other, parent) || graph.getLink(parent, other);
# if (hasOtherNode) {
# if (!hasOtherLink) {
# graph.addLink(parent, other);
# }
# return;
# }
#
# let depth = parentNode.data.depth + 1;
# graph.addNode(other, {depth});
# graph.addLink(parent, other);
# if (depth < MAX_DEPTH) queue.push(other);
# });
#
# setTimeout(loadNext, requestDelay);
# }
loadNext <- function() {
# if (cancelled) return;
if (length(queue) == 0) {
# bus.fire('graph-ready', graph)
# return;
}
nextWord <- queue.shift() # what is queue.shift in R?????
fetchNext(nextWord)
# progress.updateLayout(queue.length, nextWord) -- I think this is only for Vue UI
}
fetchNext <- function(query) {
pendingResponse = getResponse(query)
pendingResponse %...>%
res = onPendingReady(res, query) %...!%
(function(error) {
print(paste("Failed to download: ", query, "; Message: ", error$message))
loadNext()
})
}
onPendingReady <- function(res, query) {
if (length(res) >= 2) {
loadSiblings(query, res[1])
} else {
# catch and print error
# console.error(res)
# throw error
# throw new Error('Unexpected response');
}
}
fullQuery <- function(query) {
# return pattern.replace('[query]', query).replace('...', '')
}
getResponse <- function(query) {
json_response <- future::future(jsonlite::fromJSON('//suggestqueries.google.com/complete/search?client=firefox&q=' + encodeURIComponent(query)))
return(json_response)
}
}
Please note that I've included some commented out some lines of JavaScript code where I'm not sure what the R equivalent is. Most of the murky code for me is focused on how to do stuff in igraph
and how to do stuff asynchronously in R (using promises
and/or futures
).
Attribution: https://github.com/anvaka/vs/blob/master/src/lib/buildGraph.js
Thanks in advance!
I've been playing around with igraph and APIs recently so this is fairly fresh. I think the code below does what you want but it does leave out some complexities (such as not timing out the API). It isn't terribly quick - I suspect a lot of that is to do with use of the as_data_frame interface to keep track of vertices.
So I'm sure it could be optimized and I'm certain that at some point the API will return something in an encoding that breaks it, but it's a start.
library(igraph)
api_fetch <- function(query){
result <- jsonlite::fromJSON(paste0('http://suggestqueries.google.com/complete/search?client=firefox&q=', httpuv::encodeURIComponent(query)))
return(result)
}
build_query_graph <- function(entry_word, max_depth=2){
# Create an empty graph
graph <- make_empty_graph()
entry_word <- tolower(trimws(entry_word))
graph <- add_vertices(graph, 1, name=entry_word, searched=FALSE)
# Keep on doing this until the graph hits the maximum depth from the entry word
while(TRUE){
# Look up the current vertices and find their depths from the entry word
vertices <- as_data_frame(graph, what='vertices')
vertex_depth <- distances(graph, v=entry_word)
vertices$depth <- vertex_depth[match(colnames(vertex_depth), vertices$name)]
# Find vertices at least one step from the maximum depth and that haven't
# already been searched and sort to get the shallowest at the top
live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
live_vertices <- live_vertices[order(live_vertices$depth),]
# If there are any vertices meeting these criteria, then query the API
# otherwise bail from the while loop
if(nrow(live_vertices)){
# Get the vertex name and query it
this_vertex <- live_vertices$name[1]
res <- api_fetch(this_vertex)
# For each of the daughter results, check it isn't already a vertex
# and add an edge from it to this_vertex
for(daughter in res[[2]]){
if(! daughter %in% get.vertex.attribute(graph, 'name')){
graph <- add_vertices(graph, 1, name=daughter, searched=FALSE)
}
graph <- add_edges(graph, c(this_vertex, daughter))
}
# Don't search this vertex again
graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)
} else {
break
}
}
return(graph)
}
Running that:
> g <- build_query_graph('amazon')
> g
IGRAPH 0ec19b6 DN-- 90 100 --
+ attr: name (v/c), searched (v/l)
+ edges from 0ec19b6 (vertex names):
[1] amazon ->amazon amazon ->amazon prime amazon ->amazon prime video
[4] amazon ->amazon uk amazon ->amazon music amazon ->amazon smile
[7] amazon ->amazon india amazon ->amazon jobs amazon ->amazon video
[10] amazon ->amazon customer service amazon prime ->amazon prime amazon prime ->amazon prime video
[13] amazon prime ->amazon prime movies amazon prime ->amazon prime music amazon prime ->amazon prime now
[16] amazon prime ->amazon prime login amazon prime ->amazon prime uk amazon prime ->amazon prime tv
[19] amazon prime ->amazon prime cost amazon prime ->amazon prime student amazon prime video->amazon prime video
[22] amazon prime video->amazon prime video login amazon prime video->amazon prime video app amazon prime video->amazon prime video uk
+ ... omitted several edges
> plot(g)
EDIT: Thinking about it, that recalculates all the distances repeatedly and does a lot of sorting and matching. It is probably faster to save the depth of individual vertices as they are created:
build_query_graph <- function(entry_word, max_depth=2){
# Create an empty graph
graph <- make_empty_graph()
entry_word <- tolower(trimws(entry_word))
graph <- add_vertices(graph, 1, name=entry_word, depth=0, searched=FALSE)
# Keep on doing this until the graph hits the maximum depth from the entry word
while(TRUE){
# Look up the current vertices and find their depths from the entry word
vertices <- as_data_frame(graph, what='vertices')
# Find vertices at least one step from the maximum depth and that haven't
# already been searched and sort to get the shallowest at the top
live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
live_vertices <- live_vertices[order(live_vertices$depth),]
# If there are any vertices meeting these criteria, then query the API
# otherwise bail from the while loop
if(nrow(live_vertices)){
# Get the vertex name and query it
this_vertex <- live_vertices$name[1]
res <- api_fetch(this_vertex)
# For each of the daughter results, check it isn't already a vertex
# add an edge from it to this_vertex and store the depth from the entry word
for(daughter in res[[2]]){
if(! daughter %in% get.vertex.attribute(graph, 'name')){
graph <- add_vertices(graph, 1, name=daughter, depth=NA, searched=FALSE)
}
graph <- add_edges(graph, c(this_vertex, daughter))
graph <- set_vertex_attr(graph, 'depth', daughter,
distances(graph, v=entry_word, to=daughter))
}
# Don't search this vertex again
graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)
} else {
break
}
}
return(graph)
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With