Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Google drive duplicate files

I have this code to delete duplicate files but it is not working as desired, It is on a spreadsheet and you go to tools and click on script manager and gives you three buttons

StartProcess

mark Duplicates

deleteDuplicates

The first one retrieves the files, the second one turns duplicate files in light red, and the third one will supposedly erase them and turn them Red, though what it's doing is to turn them just yellow and I'm not able where to find the mistake please somebody help me

function startProcess(){
  PropertiesService.getScriptProperties().deleteAllProperties();
  try{
    ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);
  }catch(e){}
  var sh = SpreadsheetApp.getActiveSheet();
  sh.getDataRange().clear();
  sh.getRange(1,1,1,4).setValues([['fileName (logged @'+Utilities.formatDate(new Date(),Session.getScriptTimeZone(),'MMM-dd-yyyy HH:mm')+')','fileSize','parent folders tree','fileID']]);
  var trig = ScriptApp.newTrigger('getDriveFilesList_').timeBased().everyMinutes(5).create();
  Logger.log(trig.getUniqueId()+'  '+trig.getHandlerFunction());
  getDriveFilesList_();
}

function getDriveFilesList_(){
  var content = [];
  var startTime = new Date().getTime();
  var sh = SpreadsheetApp.getActiveSheet();
  if( ! PropertiesService.getScriptProperties().getProperty('numberOfFiles')){
    PropertiesService.getScriptProperties().setProperty('numberOfFiles',0);
  }

  var numberOfFiles = Number(PropertiesService.getScriptProperties().getProperty('numberOfFiles'));
  Logger.log(numberOfFiles);
  var max = numberOfFiles+10000;
  if( ! PropertiesService.getScriptProperties().getProperty('continuationToken')){
    var files = DriveApp.getFiles();
   // var files = DriveApp.getFolderById('0B3qSFd_____MTFZMDQ').getFiles();// use this line and comment the above if you want to process a single folder
   // use your chozen folder ID of course (available from the browser url , the part after "https://drive.google.com/?authuser=0#folders/")
  }else{
    var files = DriveApp.continueFileIterator(PropertiesService.getScriptProperties().getProperty('continuationToken'))
    }
  while(files.hasNext() && numberOfFiles<(max)){
    var file = files.next()
    if(file.getSize()>0){
      numberOfFiles++;
      var folder = '(shared)';
      if(file.getParents().hasNext()){folder = getTree_(file)}
      content.push([file.getName(),file.getSize(),folder,file.getId()])
    }    
    if(new Date().getTime()-startTime > 250000){break};
  }
  sh.getRange(sh.getLastRow()+1,1,content.length,content[0].length).setValues(content);
  if(!files.hasNext()){ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);Logger.log('done !'); sh.getRange(sh.getLastRow()+1,1).setValue('All files processed ('+numberOfFiles+' found)')};
  var continuationToken = files.getContinuationToken()
  PropertiesService.getScriptProperties().setProperty('numberOfFiles',numberOfFiles);
  PropertiesService.getScriptProperties().setProperty('continuationToken',continuationToken);
}

function markDuplicates(){
  handleDuplicates_(false)
}

function trashDuplicates(){
  handleDuplicates_(true)
}

function handleDuplicates_(trash){  
  var sh = SpreadsheetApp.getActiveSheet();
  sh.setFrozenRows(1);
  sh.sort(1);
  var data = sh.getDataRange().getValues()
  var headers = data.shift()
  var lastComment = data.pop();
  var toDelete = [];
  var item = data[0];
  for(var n=1 ; n<data.length; n++){
    if(data[n][0]==item[0] && data[n][1]==item[1]){
      toDelete.push('delete '+ n);
    }
    item=data[n];
  }
  var marker =   sh.getRange(2,1,data.length,1).getBackgrounds();
  for(var n in data){
    if(!trash){marker.push(['#FFF'])};
    if(toDelete.indexOf('delete '+n)>-1 && !trash){
      marker[n][0] = '#F99';
    }
    if(toDelete.indexOf('delete '+n)>-1 && trash){
      if(marker[n][0]==='#ff9999'){
        try{
        DriveApp.getFileById(data[n][3]).setTrashed(trash);
        marker[n][0] = '#F33';
        }catch(err){Logger.log(err)}
      }else{
        marker[n][0] = '#FF9';  3
      }
    }
  }
  sh.getRange(2,1,marker.length,1).setBackgrounds(marker);
}

function getTree_(file){
  var tree = [];
  var folderP = file.getParents()
  while (folderP.hasNext()){
    var folder = folderP.next();
    folderP = folder.getParents();
    tree.push(folder.getName());
  }
  return tree.reverse().join('/');
}
like image 644
Jorge Martinez Avatar asked Feb 12 '15 18:02

Jorge Martinez


People also ask

Does Google Drive detect duplicate files?

You probably have a few dozen duplicate files, chiefly MP3 and photos in the Google Drive. This is a handy tool that scans selected directories and finds any duplicate files inside them. The app works with all sorts of files and can also be configured to scan sub-folders within a given directory.

Why is Google Drive duplicating files?

Google Drive duplicate files and folders are caused by the Google Drive sync process. Therefore, if you still cannot stop Google Drive from duplicating files and fix it, it's strongly recommended to try the free cloud backup service - CBackup to backup files to Google Drive without duplicates.

Will Google Drive skip duplicates?

The user can skip copying the duplicate file in Google Drive by clicking on the option 'Cancel'. If the user clicks on the option 'Keep separate', the file will be uploaded with a filename like File (1). The 'Update Existing' option replaces the old file with the new one.


1 Answers

I found a few issues in your code so I've created a new ajrHandleDuplicates_() which seems to do what you are after. I've only run simple tests from a single folder and I've disabled the continuation trigger.

You can run the script from this sheet (and take a copy, although you've got edit access) and you can drop files into this folder to test it (you'll see the results of my last test). I made a little text file on my desktop and dropped that in and renamed it.

like image 156
Andrew Roberts Avatar answered Sep 28 '22 11:09

Andrew Roberts