Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to use ContinuationToken with recursive folder iterator

Because of Drive API Quotas, Services Quotas and limit of script execution time 6 min it's often critical to split Google Drive files manipulations on chunks.

We can use PropertiesService to store continuationToken for FolderIterator or FileIterator. This way we can stop our script and on next run continue from the place we stop.

Working example (linear iterator)

  // Logs the name of every file in the User's Drive
  // this is useful as the script may take more that 5 minutes (max execution time)
  var userProperties = PropertiesService.getUserProperties();
  var continuationToken = userProperties.getProperty('CONTINUATION_TOKEN');
  var start = new Date();
  var end = new Date();
  var maxTime = 1000*60*4.5; // Max safe time, 4.5 mins

  if (continuationToken == null) {
    // firt time execution, get all files from Drive
    var files = DriveApp.getFiles();
  } else {
    // not the first time, pick up where we left off
    var files = DriveApp.continueFileIterator(continuationToken);
  }
  while (files.hasNext() && end.getTime() - start.getTime() <= maxTime) {
    var file = files.next();
    Logger.log(file.getName());
    end = new Date();
  }

  // Save your place by setting the token in your user properties
  if(files.hasNext()){
    var continuationToken = files.getContinuationToken();
    userProperties.setProperty('CONTINUATION_TOKEN', continuationToken);
  } else {
    // Delete the token
    PropertiesService.getUserProperties().deleteProperty('CONTINUATION_TOKEN');
  }

Problem (recursive iterator)

For retrieve tree-like structure of folder and get it's files we have to use recursive function. Somethiong like this:

doFolders(DriveApp.getFolderById('root folder id'));
// recursive iteration
function doFolders(parentFolder) {
  var childFolders = parentFolder.getFolders();
  while(childFolders.hasNext()) {
    var child = childFolders.next();
    // do something with folder
    // go subfolders
    doFolders(child);
  }
}

However, in this case I have no idea how to use continuationToken.

Question

How to use ContinuationToken with recursive folder iterator, when we need to go throw all folder structure?

Assumption

Is it make sense to construct many tokens with name based on the id of each parent folder?

like image 289
vatavale Avatar asked Mar 09 '23 08:03

vatavale


1 Answers

If you're trying to recursively iterate on a folder and want to use continuation tokens (as is probably required for large folders), you'll need a data structure that can store multiple sets of continuation tokens. Both for files and folders, but also for each folder in the current hierarchy.

The simplest data structure would be an array of objects.

Here is a solution that gives you the template for creating a function that can recursively process files and store continuation tokens so it can resume if it times out.

  1. Simply modify MAX_RUNNING_TIME_MS to your desired value (now it's set to 1 minute).

    You don't want to set it more than ~4.9 minutes as the script could timeout before then and not store its current state.

  2. Update the processFile method to do whatever you want on files.
  3. Finally, call processRootFolder() and pass it a Folder. It'll be smart enough to know how to resume processing the folder.

Sure there is room for improvement (e.g. it simply checks the folder name to see if it's a resume vs. a restart) but this will most likely be sufficient for 95% of people that need to iterate recursively on a folder with continuation tokens.

function processRootFolder(rootFolder) {
  var MAX_RUNNING_TIME_MS = 1 * 60 * 1000;
  var RECURSIVE_ITERATOR_KEY = "RECURSIVE_ITERATOR_KEY";

  var startTime = (new Date()).getTime();

  // [{folderName: String, fileIteratorContinuationToken: String?, folderIteratorContinuationToken: String}]
  var recursiveIterator = JSON.parse(PropertiesService.getDocumentProperties().getProperty(RECURSIVE_ITERATOR_KEY));
  if (recursiveIterator !== null) {
    // verify that it's actually for the same folder
    if (rootFolder.getName() !== recursiveIterator[0].folderName) {
      console.warn("Looks like this is a new folder. Clearing out the old iterator.");
      recursiveIterator = null;
    } else {
      console.info("Resuming session.");
    }
  }
  if (recursiveIterator === null) {
    console.info("Starting new session.");
    recursiveIterator = [];
    recursiveIterator.push(makeIterationFromFolder(rootFolder));
  }

  while (recursiveIterator.length > 0) {
    recursiveIterator = nextIteration(recursiveIterator, startTime);

    var currTime = (new Date()).getTime();
    var elapsedTimeInMS = currTime - startTime;
    var timeLimitExceeded = elapsedTimeInMS >= MAX_RUNNING_TIME_MS;
    if (timeLimitExceeded) {
      PropertiesService.getDocumentProperties().setProperty(RECURSIVE_ITERATOR_KEY, JSON.stringify(recursiveIterator));
      console.info("Stopping loop after '%d' milliseconds. Please continue running.", elapsedTimeInMS);
      return;
    }
  }

  console.info("Done running");
  PropertiesService.getDocumentProperties().deleteProperty(RECURSIVE_ITERATOR_KEY);
}

// process the next file or folder
function nextIteration(recursiveIterator) {
  var currentIteration = recursiveIterator[recursiveIterator.length-1];
  if (currentIteration.fileIteratorContinuationToken !== null) {
    var fileIterator = DriveApp.continueFileIterator(currentIteration.fileIteratorContinuationToken);
    if (fileIterator.hasNext()) {
      // process the next file
      var path = recursiveIterator.map(function(iteration) { return iteration.folderName; }).join("/");
      processFile(fileIterator.next(), path);
      currentIteration.fileIteratorContinuationToken = fileIterator.getContinuationToken();
      recursiveIterator[recursiveIterator.length-1] = currentIteration;
      return recursiveIterator;
    } else {
      // done processing files
      currentIteration.fileIteratorContinuationToken = null;
      recursiveIterator[recursiveIterator.length-1] = currentIteration;
      return recursiveIterator;
    }
  }

  if (currentIteration.folderIteratorContinuationToken !== null) {
    var folderIterator = DriveApp.continueFolderIterator(currentIteration.folderIteratorContinuationToken);
    if (folderIterator.hasNext()) {
      // process the next folder
      var folder = folderIterator.next();
      recursiveIterator[recursiveIterator.length-1].folderIteratorContinuationToken = folderIterator.getContinuationToken();
      recursiveIterator.push(makeIterationFromFolder(folder));
      return recursiveIterator;
    } else {
      // done processing subfolders
      recursiveIterator.pop();
      return recursiveIterator;
    }
  }

  throw "should never get here";
}

function makeIterationFromFolder(folder) {
  return {
    folderName: folder.getName(), 
    fileIteratorContinuationToken: folder.getFiles().getContinuationToken(),
    folderIteratorContinuationToken: folder.getFolders().getContinuationToken()
  };
}

function processFile(file, path) {
  console.log(path + "/" + file.getName());
}
like image 71
Senseful Avatar answered Mar 24 '23 14:03

Senseful