Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Port JavaScript async and igraph code to R?

I'm struggling to port some JavaScript code (which includes async and graph functionality) to R. Help please!

Here's what I'm trying to port:

import jsonpFetch from "./jsonpFetch";
import bus from '../bus';

/**
 * This function builds a graph from google's auto-suggestions.
 */
export default function buildGraph(entryWord, pattern, MAX_DEPTH, progress) {
  entryWord = entryWord && entryWord.trim();
  if (!entryWord) return;

  entryWord = entryWord.toLocaleLowerCase();

  const insertPosition = pattern.indexOf('...');
  if (insertPosition < 0) {
    throw new Error('Query pattern is missing "..."');
  }
  const queryPosition = pattern.indexOf('[query]');
  if (queryPosition < 0) {
    throw new Error('Query pattern is missing "[query]" keyword');
  }

  if (insertPosition < queryPosition) {
    throw new Error('[query] should come before ...');
  }

  let cancelled = false;
  let pendingResponse;
  let graph = require('ngraph.graph')();
  graph.maxDepth = MAX_DEPTH;
  let queue = [];
  let requestDelay = 300 + Math.random() * 100;
  progress.startDownload();

  startQueryConstruction();

  return {
    dispose,
    graph
  }

  function dispose() {
    cancelled = true;
    if (pendingResponse) {
      pendingResponse.cancel();
      pendingResponse = null;
    }
  }

  function startQueryConstruction() {
    graph.addNode(entryWord, {depth: 0});
    fetchNext(entryWord);
  }

  function loadSiblings(parent, results) {
    let q = fullQuery(parent).toLocaleLowerCase();
    var parentNode = graph.getNode(parent);

    if (!parentNode) {
      throw new Error('Parent is missing for ' + parent);
    }

    results.filter(x => x.toLocaleLowerCase().indexOf(q) === 0)
      .map(x => x.substring(q.length))
      .forEach(other => {
        const hasOtherNode = graph.hasNode(other);
        const hasOtherLink = graph.getLink(other, parent) || graph.getLink(parent, other);
        if (hasOtherNode) {
          if (!hasOtherLink) {
            graph.addLink(parent, other);
          }
          return;
        }

        let depth = parentNode.data.depth + 1;
        graph.addNode(other, {depth});
        graph.addLink(parent, other);
        if (depth < MAX_DEPTH) queue.push(other);
      });

    setTimeout(loadNext, requestDelay);
  }

  function loadNext() {
    if (cancelled) return;
    if (queue.length === 0) {
      bus.fire('graph-ready', graph);
      return;
    }

    let nextWord = queue.shift();
    fetchNext(nextWord);
    progress.updateLayout(queue.length, nextWord);
  }

  function fetchNext(query) {
    pendingResponse = getResponse(fullQuery(query));
    pendingResponse
      .then(res => onPendingReady(res, query))
      .catch((msg) => {
        const err = 'Failed to download ' + query + '; Message: ' + msg;
        console.error(err);
        progress.downloadError(err)
        loadNext();
      });
  }

  function onPendingReady(res, query) {
    if (res.length >= 2) {
      loadSiblings(query, res[1]);
    } else {
      console.error(res);
      throw new Error('Unexpected response');
    }
  }

  function fullQuery(query) {
    return pattern.replace('[query]', query).replace('...', '');
  }

  function getResponse(query) {
    return jsonpFetch('//suggestqueries.google.com/complete/search?client=firefox&q=' + encodeURIComponent(query));
  }
}

And this is what I have so far in R:

# This function builds a graph from Google's Auto-Suggestions

buildGraph <- function(entryWord, pattern) {

  graph <- igraph::make_empty_graph() # setup empty graph

  entryWord <- trimws(entryWord) #remove leading/trailing whitespace
  entryWord <- tolower(entryWord) # lowercase technology name

  requestDelay <- 0.3 + runif(1, 0, 1) * 0.1 # 300 milliseconds (0.3 seconds) + some number between 0 and 1 * 100 milliseconds (0.1 seconds)

  startQueryConstruction()

  dispose <- function() {
    cancelled <- TRUE
    if (pendingResponse) {
      # pendingResponse.cancel();
      # pendingResponse = null;
    }
  }

  startQueryConstruction <- function() {
    graph %>% igraph::add.vertices(entryWord)
    fetchNext(entryWord)
  }

  loadSiblings <- function(parent, results) {
    q = tolower(fullQuery(parent))
    parentNode <- igraph::vertex_attr(graph, parent)

    if (!parentNode) {
      # throw new Error('Parent is missing for ' + parent);
      stderr(paste0('Parent is missing for ', parent))
    }

    # results.filter(x => x.toLocaleLowerCase().indexOf(q) === 0)
  #     .map(x => x.substring(q.length))
  #     .forEach(other => {
  #       const hasOtherNode = graph.hasNode(other);
  #       const hasOtherLink = graph.getLink(other, parent) || graph.getLink(parent, other);
  #       if (hasOtherNode) {
  #         if (!hasOtherLink) {
  #           graph.addLink(parent, other);
  #         }
  #         return;
  #       }
  #       
  #       let depth = parentNode.data.depth + 1;
  #       graph.addNode(other, {depth});
  #       graph.addLink(parent, other);
  #       if (depth < MAX_DEPTH) queue.push(other);
  #       });
  #     
  #     setTimeout(loadNext, requestDelay);
  # }

  loadNext <- function() {
    # if (cancelled) return;
    if (length(queue) == 0) {
      # bus.fire('graph-ready', graph)
      # return;
    }

    nextWord <- queue.shift() # what is queue.shift in R?????
    fetchNext(nextWord)
    # progress.updateLayout(queue.length, nextWord) -- I think this is only for Vue UI
  }

  fetchNext <- function(query) {
    pendingResponse = getResponse(query)
    pendingResponse %...>%
       res = onPendingReady(res, query) %...!%
        (function(error) {
          print(paste("Failed to download: ", query, "; Message: ", error$message))
          loadNext()
        })
  }

  onPendingReady <- function(res, query) {
    if (length(res) >= 2) {
      loadSiblings(query, res[1])
    } else {
      # catch and print error
      # console.error(res)
      # throw error
      # throw new Error('Unexpected response');
    }
  }

  fullQuery <- function(query) {
    # return pattern.replace('[query]', query).replace('...', '')
  }

  getResponse <- function(query) {
    json_response <- future::future(jsonlite::fromJSON('//suggestqueries.google.com/complete/search?client=firefox&q=' + encodeURIComponent(query)))
    return(json_response)
  }


}

Please note that I've included some commented out some lines of JavaScript code where I'm not sure what the R equivalent is. Most of the murky code for me is focused on how to do stuff in igraph and how to do stuff asynchronously in R (using promises and/or futures).

Attribution: https://github.com/anvaka/vs/blob/master/src/lib/buildGraph.js

Thanks in advance!

like image 719
Abe Avatar asked Oct 25 '19 03:10

Abe


1 Answers

I've been playing around with igraph and APIs recently so this is fairly fresh. I think the code below does what you want but it does leave out some complexities (such as not timing out the API). It isn't terribly quick - I suspect a lot of that is to do with use of the as_data_frame interface to keep track of vertices.

So I'm sure it could be optimized and I'm certain that at some point the API will return something in an encoding that breaks it, but it's a start.

library(igraph)

api_fetch <- function(query){
    result <- jsonlite::fromJSON(paste0('http://suggestqueries.google.com/complete/search?client=firefox&q=', httpuv::encodeURIComponent(query)))
    return(result)
}


build_query_graph <- function(entry_word, max_depth=2){

    # Create an empty graph
    graph <- make_empty_graph()
    entry_word <- tolower(trimws(entry_word))
    graph <- add_vertices(graph, 1, name=entry_word, searched=FALSE)

    # Keep on doing this until the graph hits the maximum depth from the entry word
    while(TRUE){

        # Look up the current vertices and find their depths from the entry word
        vertices <- as_data_frame(graph, what='vertices')
        vertex_depth <- distances(graph, v=entry_word)
        vertices$depth <- vertex_depth[match(colnames(vertex_depth), vertices$name)]

        # Find vertices at least one step from the maximum depth and that haven't 
        # already been searched and sort to get the shallowest at the top
        live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
        live_vertices <- live_vertices[order(live_vertices$depth),]

        # If there are any vertices meeting these criteria, then query the API
        # otherwise bail from the while loop
        if(nrow(live_vertices)){

            # Get the vertex name and query it
            this_vertex <- live_vertices$name[1]
            res <- api_fetch(this_vertex)

            # For each of the daughter results, check it isn't already a vertex
            # and add an edge from it to this_vertex
            for(daughter in res[[2]]){

                if(! daughter %in% get.vertex.attribute(graph, 'name')){
                    graph <- add_vertices(graph, 1, name=daughter, searched=FALSE)
                }

                graph <- add_edges(graph, c(this_vertex, daughter))
            }

            # Don't search this vertex again
            graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)

        } else {
            break
        }       
    }
    return(graph)
}

Running that:

> g <- build_query_graph('amazon')
> g
IGRAPH 0ec19b6 DN-- 90 100 -- 
+ attr: name (v/c), searched (v/l)
+ edges from 0ec19b6 (vertex names):
 [1] amazon            ->amazon                        amazon            ->amazon prime                  amazon            ->amazon prime video           
 [4] amazon            ->amazon uk                     amazon            ->amazon music                  amazon            ->amazon smile                 
 [7] amazon            ->amazon india                  amazon            ->amazon jobs                   amazon            ->amazon video                 
[10] amazon            ->amazon customer service       amazon prime      ->amazon prime                  amazon prime      ->amazon prime video           
[13] amazon prime      ->amazon prime movies           amazon prime      ->amazon prime music            amazon prime      ->amazon prime now             
[16] amazon prime      ->amazon prime login            amazon prime      ->amazon prime uk               amazon prime      ->amazon prime tv              
[19] amazon prime      ->amazon prime cost             amazon prime      ->amazon prime student          amazon prime video->amazon prime video           
[22] amazon prime video->amazon prime video login      amazon prime video->amazon prime video app        amazon prime video->amazon prime video uk        
+ ... omitted several edges
> plot(g)

output of command

EDIT: Thinking about it, that recalculates all the distances repeatedly and does a lot of sorting and matching. It is probably faster to save the depth of individual vertices as they are created:

build_query_graph <- function(entry_word, max_depth=2){

    # Create an empty graph
    graph <- make_empty_graph()
    entry_word <- tolower(trimws(entry_word))
    graph <- add_vertices(graph, 1, name=entry_word, depth=0, searched=FALSE)

    # Keep on doing this until the graph hits the maximum depth from the entry word
    while(TRUE){

        # Look up the current vertices and find their depths from the entry word
        vertices <- as_data_frame(graph, what='vertices')

        # Find vertices at least one step from the maximum depth and that haven't 
        # already been searched and sort to get the shallowest at the top
        live_vertices <- subset(vertices, depth <= (max_depth - 1) & ! searched)
        live_vertices <- live_vertices[order(live_vertices$depth),]

        # If there are any vertices meeting these criteria, then query the API
        # otherwise bail from the while loop
        if(nrow(live_vertices)){

            # Get the vertex name and query it
            this_vertex <- live_vertices$name[1]
            res <- api_fetch(this_vertex)

            # For each of the daughter results, check it isn't already a vertex
            # add an edge from it to this_vertex and store the depth from the entry word
            for(daughter in res[[2]]){

                if(! daughter %in% get.vertex.attribute(graph, 'name')){
                    graph <- add_vertices(graph, 1, name=daughter, depth=NA, searched=FALSE)
                }

                graph <- add_edges(graph, c(this_vertex, daughter))
                graph <- set_vertex_attr(graph, 'depth', daughter,
                                         distances(graph, v=entry_word, to=daughter))
            }

            # Don't search this vertex again
            graph <- set_vertex_attr(graph, 'searched', this_vertex, TRUE)

        } else {
            break
        }       
    }

    return(graph)
}
like image 155
David_O Avatar answered Nov 06 '22 10:11

David_O