I'm going to go a bit in-depth with my problem, you can jump to the TL;DR if you don't want to read all of this
I need to store a "file" (text document) which can be user-edited. If I have my original file (which could be huge)
Lorem ipsum dolor sit amet
and the user were to make a change:
Foo ipsum amet_ sit
Basically, I have the original string and the user-edited string. I want to find the differences, "edits". To prevent storing duplicates of very large strings. I want to store the original and the "edits". Then apply the edits to the original. Kind of like data de-duplication. The problem is that I have no idea how different edits can be and I also need to be able to apply those edits to the string.
Because the text could be huge, I am wondering what would be the most "efficient" way to store edits to the text without storing two separate versions. My first guess was something along the lines of:
var str = 'Original String of text...'.split(' ') || [],
mod = 'Modified String of text...'.split(' ') || [], i, edits = [];
for (i = 0; i < str.length; i += 1) {
edits.push(str[i]===mod[i] ? undefined : mod[i]);
}
console.log(edits); // ["Modified", null, null, null] (desired output)
then to revert back:
for (i = 0; i < str.length; i += 1) {
str[i] = edits[i] || str[i];
}
str.join(' '); // "Modified String of text..."
Basically, I'm trying to split the text by spaces into arrays. Compare the arrays and store the differences. Then apply the differences to generate the modified version
But if the amount of spaces were to change, problems would occur:
str
: Original String of text...
mod
: OriginalString of text...
Output: OriginalString of text... text...
My desired output: OriginalString of text...
Even if I were to switch str.length
with mod.length
and edits.length
like:
// Get edits
var str = 'Original String of text...'.split(' ') || [],
mod = 'Modified String of text...'.split(' ') || [], i, edits = [];
for (i = 0; i < mod.length; i += 1) {
edits.push(str[i]===mod[i] ? undefined : mod[i]);
}
// Apply edits
var final = [];
for (i = 0; i < edits.length; i += 1) {
final[i] = edits[i] || str[i];
}
final = final.join(' ');
edits
would be: ["ModifiedString", "of", "text..."]
in result making the whole 'storing edits thing useless. And even worse if a word were to be added / removed. If str
were to become Original String of lots of text...
. The output would still be the same.
I can see that they're many flaws in the way I'm doing this, but I can't think of any other way.
Snippet:
document.getElementById('go').onclick = function() {
var str = document.getElementById('a').value.split(' ') || [],
mod = document.getElementById('b').value.split(' ') || [],
i, edits = [];
for (i = 0; i < mod.length; i += 1) {
edits.push(str[i] === mod[i] ? undefined : mod[i]);
}
// Apply edits
var final = [];
for (i = 0; i < edits.length; i += 1) {
final[i] = edits[i] || str[i];
}
final = final.join(' ');
alert(final);
};
document.getElementById('go2').onclick = function() {
var str = document.getElementById('a').value.split(' ') || [],
mod = document.getElementById('b').value.split(' ') || [],
i, edits = [];
for (i = 0; i < str.length; i += 1) {
edits.push(str[i] === mod[i] ? undefined : mod[i]);
}
for (i = 0; i < str.length; i += 1) {
str[i] = edits[i] || str[i];
}
alert(str.join(' ')); // "Modified String of text..."
};
Base String:
<input id="a">
<br/>Modified String:
<input id="b" />
<br/>
<button id="go">Second method</button>
<button id="go2">First Method</button>
How would you find the changes between two strings?
I'm dealing with large pieces of text each could be about a megabyte hundred kilobytes. This is running on the browser
Edit: Added modified script that can handle more than one text area.
Here is the JSFiddle for a page with more than one editable text area. (Don't forget to open dev tools to see the edits.) You just need to assign each textarea a unique id. Then, create a map using those id's as keys and each textarea's edits array as the values. Here is the updated script:
'use strict';
function Edit(type, position, text) {
this.type = type;
this.position = position;
this.text = text;
}
var ADD = 'add';
var DELETE = 'delete';
var textAreaEditsMap = {};
var cursorStart = -1;
var cursorEnd = -1;
var currentEdit = null;
var deleteOffset = 1;
window.addEventListener('load', function() {
var textareas = document.getElementsByClassName('text-editable');
for (var i = 0; i < textareas.length; ++i) {
var textarea = textareas.item(i);
var id = textarea.getAttribute('id');
textAreaEditsMap[id] = [];
textarea.addEventListener('mouseup', handleMouseUp);
textarea.addEventListener('keydown', handleKeyDown);
textarea.addEventListener('keypress', handleKeyPress);
}
});
function handleMouseUp(event) {
cursorStart = this.selectionStart;
cursorEnd = this.selectionEnd;
currentEdit = null;
}
function handleKeyDown(event) {
cursorStart = this.selectionStart;
cursorEnd = this.selectionEnd;
if (event.keyCode >= 35 && event.keyCode <= 40) { // detect cursor movement keys
currentEdit = null;
}
// deleting text
if (event.keyCode === 8 || event.keyCode === 46) {
if (currentEdit != null && currentEdit.type !== 'delete') {
currentEdit = null;
}
if (cursorStart !== cursorEnd) { // Deleting highlighted text
var edit = new Edit(DELETE, cursorStart, this.innerHTML.substring(cursorStart, cursorEnd));
textAreaEditsMap[this.getAttribute('id')].push(edit);
currentEdit = null;
} else if (event.keyCode === 8) { // backspace
if (currentEdit == null) {
deleteOffset = 1;
var edit = new Edit(DELETE, cursorStart, this.innerHTML[cursorStart - 1]);
textAreaEditsMap[this.getAttribute('id')].push(edit);
currentEdit = edit;
} else {
++deleteOffset;
currentEdit.text = this.innerHTML[cursorStart - 1] + currentEdit.text;
}
} else if (event.keyCode === 46) { // delete
if (currentEdit == null) {
deleteOffset = 1;
var edit = new Edit(DELETE, cursorStart, this.innerHTML[cursorStart]);
textAreaEditsMap[this.getAttribute('id')].push(edit);
currentEdit = edit;
} else {
currentEdit.text += this.innerHTML[cursorStart + deleteOffset++];
}
}
}
console.log(textAreaEditsMap)
}
function handleKeyPress(event) {
if (currentEdit != null && currentEdit.type !== 'add') {
currentEdit = null;
}
if (currentEdit == null) {
currentEdit = new Edit(ADD, cursorStart, String.fromCharCode(event.charCode));
textAreaEditsMap[this.getAttribute('id')].push(currentEdit);
} else {
currentEdit.text += String.fromCharCode(event.charCode);
}
console.log(textAreaEditsMap);
}
Original post with original script that only handles one textarea follows:
I made an example script that does what you need. I put a working example on JSFiddle. Make sure you press ctrl+shift+J on the JSFiddle example page to open dev tools so you can see the array of edits logged as edits are made. Edits are added to the edits array in chronological order, so you can revert back to the original text by applying the inverse (i.e., add deleted text back; remove added text) in reverse chronological order (i.e., iterate the array backwards). I did not handle copying, pasting, undoing, or redoing from the context menu or through key bindings, but I think that you should be able to use this example as a guide to take care of those things. Here is the script:
'use strict';
function Edit(type, position, text) {
this.type = type;
this.position = position;
this.text = text;
}
window.addEventListener('load', function() {
var ADD = 'add';
var DELETE = 'delete';
var cursorStart = -1;
var cursorEnd = -1;
var edits = [];
var currentEdit = null;
var deleteOffset = 1;
var textarea = document.getElementById('saved-text');
textarea.addEventListener('mouseup', function(event) {
cursorStart = this.selectionStart;
cursorEnd = this.selectionEnd;
currentEdit = null;
});
textarea.addEventListener('keydown', function(event) {
cursorStart = this.selectionStart;
cursorEnd = this.selectionEnd;
if(event.keyCode >= 35 && event.keyCode <= 40) { // detect cursor movement keys
currentEdit = null;
}
// deleting text
if(event.keyCode === 8 || event.keyCode === 46) {
if(currentEdit != null && currentEdit.type !== 'delete') {
currentEdit = null;
}
if(cursorStart !== cursorEnd) {
var edit = new Edit(DELETE, cursorStart, textarea.innerHTML.substring(cursorStart, cursorEnd));
edits.push(edit);
currentEdit = null;
} else if (event.keyCode === 8) { // backspace
if (currentEdit == null) {
deleteOffset = 1;
var edit = new Edit(DELETE, cursorStart, textarea.innerHTML[cursorStart - 1]);
edits.push(edit);
currentEdit = edit;
} else {
++deleteOffset;
currentEdit.text = textarea.innerHTML[cursorStart - 1] + currentEdit.text;
}
} else if (event.keyCode === 46) { // delete
if(currentEdit == null) {
deleteOffset = 1;
var edit = new Edit(DELETE, cursorStart, textarea.innerHTML[cursorStart]);
edits.push(edit);
currentEdit = edit;
} else {
currentEdit.text += textarea.innerHTML[cursorStart + deleteOffset++];
}
}
}
console.log(edits)
});
textarea.addEventListener('keypress', function(event) {
if(currentEdit != null && currentEdit.type !== 'add') {
currentEdit = null;
}
// adding text
if(currentEdit == null) {
currentEdit = new Edit(ADD, cursorStart, String.fromCharCode(event.charCode));
edits.push(currentEdit);
} else {
currentEdit.text += String.fromCharCode(event.charCode);
}
console.log(edits);
});
});
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With