Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to Find duplicates files (by name and/or size) in a specific folder of my google drive?

Is it possible to create a Google apps script which would select automatically duplicate files inside a specific (current) folder (then I just had to delete them with one click) ?

https://developers.google.com/apps-script/reference/drive/

I am asking this because Google drive pc client create random duplicates files when updating them ! They want to solve this bug but I have to manually select them in each folders and then delete them. It is very annoying and time consuming. https://productforums.google.com/forum/#!category-topic/drive/report-a-problem/_ApOopI-Xkw

Edit: Alternatively, as windows cannot create the same filename inside the same folder, find and put in the trash all duplicate filenames inside a specific main folder (and its each of its related subfolders).

like image 647
miodf Avatar asked Mar 29 '14 19:03

miodf


People also ask

Can Google Drive find duplicates?

You probably have a few dozen duplicate files, chiefly MP3 and photos in the Google Drive. This is a handy tool that scans selected directories and finds any duplicate files inside them. The app works with all sorts of files and can also be configured to scan sub-folders within a given directory.


1 Answers

For better readability I add the next part of this answer in a second answer.

Here is the code to handle the duplicates : it works in 2 steps :

  1. detect the duplicates and mark them in the sheet to check if everything is fine (markDuplicates())
  2. remove the red background on the files you want to preserve (they will become yellow after step 3 is done)
  3. delete the selected (marked) files, actually move them to trash, you'll have to empty the trash manually from the drive Ui. (trashDuplicates()) The deleted files will be colored in deep red to confirm deletion.

LAST EDIT :

Placed the full code here and made some functions private to avoid errors due to bad script calls.

function startProcess(){
  PropertiesService.getScriptProperties().deleteAllProperties();
  try{
    ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);
  }catch(e){}
  var sh = SpreadsheetApp.getActiveSheet();
  sh.getDataRange().clear();
  sh.getRange(1,1,1,4).setValues([['fileName (logged @'+Utilities.formatDate(new Date(),Session.getScriptTimeZone(),'MMM-dd-yyyy HH:mm')+')','fileSize','parent folders tree','fileID']]);
  var trig = ScriptApp.newTrigger('getDriveFilesList_').timeBased().everyMinutes(5).create();
  Logger.log(trig.getUniqueId()+'  '+trig.getHandlerFunction());
  getDriveFilesList_();
}

function getDriveFilesList_(){
  var content = [];
  var startTime = new Date().getTime();
  var sh = SpreadsheetApp.getActiveSheet();
  if( ! PropertiesService.getScriptProperties().getProperty('numberOfFiles')){
    PropertiesService.getScriptProperties().setProperty('numberOfFiles',0);
  }

  var numberOfFiles = Number(PropertiesService.getScriptProperties().getProperty('numberOfFiles'));
  Logger.log(numberOfFiles);
  var max = numberOfFiles+10000;
  if( ! PropertiesService.getScriptProperties().getProperty('continuationToken')){
    var files = DriveApp.getFiles();
   // var files = DriveApp.getFolderById('0B3qSFd_____MTFZMDQ').getFiles();// use this line and comment the above if you want to process a single folder
   // use your chozen folder ID of course (available from the browser url , the part after "https://drive.google.com/?authuser=0#folders/")
  }else{
    var files = DriveApp.continueFileIterator(PropertiesService.getScriptProperties().getProperty('continuationToken'))
    }
  while(files.hasNext() && numberOfFiles<(max)){
    var file = files.next()
    if(file.getSize()>0){
      numberOfFiles++;
      var folder = '(shared)';
      if(file.getParents().hasNext()){folder = getTree_(file)}
      content.push([file.getName(),file.getSize(),folder,file.getId()])
    }    
    if(new Date().getTime()-startTime > 250000){break};
  }
  sh.getRange(sh.getLastRow()+1,1,content.length,content[0].length).setValues(content);
  if(!files.hasNext()){ScriptApp.deleteTrigger(ScriptApp.getProjectTriggers()[0]);Logger.log('done !'); sh.getRange(sh.getLastRow()+1,1).setValue('All files processed ('+numberOfFiles+' found)')};
  var continuationToken = files.getContinuationToken()
  PropertiesService.getScriptProperties().setProperty('numberOfFiles',numberOfFiles);
  PropertiesService.getScriptProperties().setProperty('continuationToken',continuationToken);
}

function markDuplicates(){
  handleDuplicates_(false)
}

function trashDuplicates(){
  handleDuplicates_(true)
}

function handleDuplicates_(trash){  
  var sh = SpreadsheetApp.getActiveSheet();
  sh.setFrozenRows(1);
  sh.sort(1);
  var data = sh.getDataRange().getValues()
  var headers = data.shift()
  var lastComment = data.pop();
  var toDelete = [];
  var item = data[0];
  for(var n=1 ; n<data.length; n++){
    if(data[n][0]==item[0] && data[n][1]==item[1]){
      toDelete.push('delete '+n);
    }
    item=data[n];
  }
  var marker =   sh.getRange(2,1,data.length,1).getBackgrounds();
  for(var n in data){
    if(!trash){marker.push(['#FFF'])};
    if(toDelete.indexOf('delete '+n)>-1 && !trash){
      marker[n][0] = '#F99';
    }
    if(toDelete.indexOf('delete '+n)>-1 && trash){
      if(marker[n][0]=='#ff9999'){
        try{
        DriveApp.getFileById(data[n][3]).setTrashed(trash);
        marker[n][0] = '#F33';
        }catch(err){Logger.log(err)}
      }else{
        marker[n][0] = '#FF9';
      }
    }
  }
  sh.getRange(2,1,marker.length,1).setBackgrounds(marker);
}

function getTree_(file){
  var tree = [];
  var folderP = file.getParents()
  while (folderP.hasNext()){
    var folder = folderP.next();
    folderP = folder.getParents();
    tree.push(folder.getName());
  }
  return tree.reverse().join('/');
}
like image 103
Serge insas Avatar answered Sep 22 '22 13:09

Serge insas