Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Grabbing Edits from two strings

I'm going to go a bit in-depth with my problem, you can jump to the TL;DR if you don't want to read all of this

What I'm trying to do

I need to store a "file" (text document) which can be user-edited. If I have my original file (which could be huge)

Lorem ipsum dolor sit amet

and the user were to make a change:

Foo ipsum amet_ sit

Basically, I have the original string and the user-edited string. I want to find the differences, "edits". To prevent storing duplicates of very large strings. I want to store the original and the "edits". Then apply the edits to the original. Kind of like data de-duplication. The problem is that I have no idea how different edits can be and I also need to be able to apply those edits to the string.

Attempts

Because the text could be huge, I am wondering what would be the most "efficient" way to store edits to the text without storing two separate versions. My first guess was something along the lines of:

var str = 'Original String of text...'.split(' ') || [],
    mod = 'Modified String of text...'.split(' ') || [], i, edits = [];

for (i = 0; i < str.length; i += 1) {
    edits.push(str[i]===mod[i] ? undefined : mod[i]);
}

console.log(edits); // ["Modified", null, null, null] (desired output)

then to revert back:

for (i = 0; i < str.length; i += 1) {
    str[i] = edits[i] || str[i];
}
str.join(' '); // "Modified String of text..."

Basically, I'm trying to split the text by spaces into arrays. Compare the arrays and store the differences. Then apply the differences to generate the modified version

Problems

But if the amount of spaces were to change, problems would occur:

str: Original String of text... mod: OriginalString of text...

Output: OriginalString of text... text...

My desired output: OriginalString of text...


Even if I were to switch str.length with mod.length and edits.length like:

// Get edits
var str = 'Original String of text...'.split(' ') || [],
    mod = 'Modified String of text...'.split(' ') || [], i, edits = [];

for (i = 0; i < mod.length; i += 1) {
    edits.push(str[i]===mod[i] ? undefined : mod[i]);
}

// Apply edits
var final = [];
for (i = 0; i < edits.length; i += 1) {
    final[i] = edits[i] || str[i];
}
final = final.join(' ');

edits would be: ["ModifiedString", "of", "text..."] in result making the whole 'storing edits thing useless. And even worse if a word were to be added / removed. If str were to become Original String of lots of text.... The output would still be the same.


I can see that they're many flaws in the way I'm doing this, but I can't think of any other way.

Snippet:

document.getElementById('go').onclick = function() {
  var str = document.getElementById('a').value.split(' ') || [],
    mod = document.getElementById('b').value.split(' ') || [],
    i, edits = [];

  for (i = 0; i < mod.length; i += 1) {
    edits.push(str[i] === mod[i] ? undefined : mod[i]);
  }

  // Apply edits
  var final = [];
  for (i = 0; i < edits.length; i += 1) {
    final[i] = edits[i] || str[i];
  }
  final = final.join(' ');
  alert(final);
};

document.getElementById('go2').onclick = function() {
  var str = document.getElementById('a').value.split(' ') || [],
    mod = document.getElementById('b').value.split(' ') || [],
    i, edits = [];

  for (i = 0; i < str.length; i += 1) {
    edits.push(str[i] === mod[i] ? undefined : mod[i]);
  }

  for (i = 0; i < str.length; i += 1) {
    str[i] = edits[i] || str[i];
  }
  alert(str.join(' ')); // "Modified String of text..."
};
Base String:
<input id="a">
<br/>Modified String:
<input id="b" />
<br/>
<button id="go">Second method</button>
<button id="go2">First Method</button>

TL;DR:

How would you find the changes between two strings?


I'm dealing with large pieces of text each could be about a megabyte hundred kilobytes. This is running on the browser

like image 503
Downgoat Avatar asked Jun 07 '15 00:06

Downgoat


1 Answers

Edit: Added modified script that can handle more than one text area.

Here is the JSFiddle for a page with more than one editable text area. (Don't forget to open dev tools to see the edits.) You just need to assign each textarea a unique id. Then, create a map using those id's as keys and each textarea's edits array as the values. Here is the updated script:

'use strict';

function Edit(type, position, text) {
  this.type = type;
  this.position = position;
  this.text = text;
}

var ADD = 'add';
var DELETE = 'delete';

var textAreaEditsMap = {};

var cursorStart = -1;
var cursorEnd = -1;
var currentEdit = null;
var deleteOffset = 1;

window.addEventListener('load', function() {
  var textareas = document.getElementsByClassName('text-editable');

  for (var i = 0; i < textareas.length; ++i) {
    var textarea = textareas.item(i);
    var id = textarea.getAttribute('id');

    textAreaEditsMap[id] = [];
    textarea.addEventListener('mouseup', handleMouseUp);
    textarea.addEventListener('keydown', handleKeyDown);
    textarea.addEventListener('keypress', handleKeyPress);
  }
});

function handleMouseUp(event) {
  cursorStart = this.selectionStart;
  cursorEnd = this.selectionEnd;
  currentEdit = null;
}

function handleKeyDown(event) {

  cursorStart = this.selectionStart;
  cursorEnd = this.selectionEnd;

  if (event.keyCode >= 35 && event.keyCode <= 40) { // detect cursor movement keys
    currentEdit = null;
  }

  // deleting text
  if (event.keyCode === 8 || event.keyCode === 46) {
    if (currentEdit != null && currentEdit.type !== 'delete') {
      currentEdit = null;
    }

    if (cursorStart !== cursorEnd) { // Deleting highlighted text
      var edit = new Edit(DELETE, cursorStart, this.innerHTML.substring(cursorStart, cursorEnd));
      textAreaEditsMap[this.getAttribute('id')].push(edit);
      currentEdit = null;

    } else if (event.keyCode === 8) { // backspace
      if (currentEdit == null) {
        deleteOffset = 1;
        var edit = new Edit(DELETE, cursorStart, this.innerHTML[cursorStart - 1]);
        textAreaEditsMap[this.getAttribute('id')].push(edit);
        currentEdit = edit;
      } else {
        ++deleteOffset;
        currentEdit.text = this.innerHTML[cursorStart - 1] + currentEdit.text;
      }

    } else if (event.keyCode === 46) { // delete
      if (currentEdit == null) {
        deleteOffset = 1;
        var edit = new Edit(DELETE, cursorStart, this.innerHTML[cursorStart]);
        textAreaEditsMap[this.getAttribute('id')].push(edit);
        currentEdit = edit;

      } else {
        currentEdit.text += this.innerHTML[cursorStart + deleteOffset++];
      }
    }
  }

  console.log(textAreaEditsMap)
}

function handleKeyPress(event) {

  if (currentEdit != null && currentEdit.type !== 'add') {
    currentEdit = null;
  }

  if (currentEdit == null) {
    currentEdit = new Edit(ADD, cursorStart, String.fromCharCode(event.charCode));
    textAreaEditsMap[this.getAttribute('id')].push(currentEdit);
  } else {
    currentEdit.text += String.fromCharCode(event.charCode);
  }

  console.log(textAreaEditsMap);
}

Original post with original script that only handles one textarea follows:

I made an example script that does what you need. I put a working example on JSFiddle. Make sure you press ctrl+shift+J on the JSFiddle example page to open dev tools so you can see the array of edits logged as edits are made. Edits are added to the edits array in chronological order, so you can revert back to the original text by applying the inverse (i.e., add deleted text back; remove added text) in reverse chronological order (i.e., iterate the array backwards). I did not handle copying, pasting, undoing, or redoing from the context menu or through key bindings, but I think that you should be able to use this example as a guide to take care of those things. Here is the script:

'use strict';

function Edit(type, position, text) {
  this.type = type;
  this.position = position;
  this.text = text;
}

window.addEventListener('load', function() {
  var ADD = 'add';
  var DELETE = 'delete';

  var cursorStart = -1;
  var cursorEnd = -1;
  var edits = [];
  var currentEdit = null;
  var deleteOffset = 1;

  var textarea = document.getElementById('saved-text');

  textarea.addEventListener('mouseup', function(event) {
    cursorStart = this.selectionStart;
    cursorEnd = this.selectionEnd;
    currentEdit = null;
  });

  textarea.addEventListener('keydown', function(event) {

    cursorStart = this.selectionStart;
    cursorEnd = this.selectionEnd;

    if(event.keyCode >= 35 && event.keyCode <= 40) { // detect cursor movement keys
      currentEdit = null;
    }

    // deleting text
    if(event.keyCode === 8 || event.keyCode === 46) {
      if(currentEdit != null && currentEdit.type !== 'delete') {
        currentEdit = null;
      }

      if(cursorStart !== cursorEnd) {
        var edit = new Edit(DELETE, cursorStart, textarea.innerHTML.substring(cursorStart, cursorEnd));
        edits.push(edit);
        currentEdit = null;

      } else if (event.keyCode === 8) { // backspace
        if (currentEdit == null) {
          deleteOffset = 1;
          var edit = new Edit(DELETE, cursorStart, textarea.innerHTML[cursorStart - 1]);
          edits.push(edit);
          currentEdit = edit;
        } else {
          ++deleteOffset;
          currentEdit.text = textarea.innerHTML[cursorStart - 1] + currentEdit.text;
        }

      } else if (event.keyCode === 46) { // delete
        if(currentEdit == null) {
          deleteOffset = 1;
          var edit = new Edit(DELETE, cursorStart, textarea.innerHTML[cursorStart]);
          edits.push(edit);
          currentEdit = edit;

        } else {
          currentEdit.text += textarea.innerHTML[cursorStart + deleteOffset++];
        }
      }
    }

    console.log(edits)
  });

  textarea.addEventListener('keypress', function(event) {

    if(currentEdit != null && currentEdit.type !== 'add') {
      currentEdit = null;
    }

    // adding text
    if(currentEdit == null) {
      currentEdit = new Edit(ADD, cursorStart, String.fromCharCode(event.charCode));
      edits.push(currentEdit);
    } else {
      currentEdit.text += String.fromCharCode(event.charCode);
    }

    console.log(edits);
  });

});
like image 152
orb Avatar answered Oct 09 '22 19:10

orb