Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How can I speed up my javascript paste operation?

I'm using a NicEdit rich text editor (content editable div based) in my application and users love to paste from word.

So I wanted to strip out any and all junk tags that might get pasted in.

Here is what I'm currently doing.

//build regex to match junk tags
var unwantedtags = [ "font", "span", "table", "tbody", "div", "td", "tr", "input", "a", 
    "body", "button", "form", "head", "img", "select", "textarea", "tfoot", "th", "iframe", "object" ];     
var unwantedregexstring= "";
$.each(unwantedtags, function(index, value) {
    if(unwantedregexstring!= "") {
        unwantedregexstring += "|";
    }
    unwantedregexstring+= "<" + value + ">";
    unwantedregexstring+= "|";
    unwantedregexstring+= "<" + value + "\\s[^>]*>";
    unwantedregexstring+= "|";
    unwantedregexstring+= "</" + value + ">";
});
var unwantedRegex = new RegExp(unwantedregexstring, "igm");

//replace junk tags with nothing
function CleanMSWordPaste(mswordtext) {
    return  mswordtext.replace(unwantedRegex, "");
}

//Function that gets Executed on Paste event
function ExecutePaste(){

    //preserve user's selected text
    var oldRng = document.selection.createRange();

    //create paste area off screen and paste there
    $('body').append("<div id='paster' contenteditable='true' style='height:1px;width:1px;position:fixed;left:-100px;top:-100px;'></div>");
    $('#paster').focus();
    $('#paster')[0].document.execCommand('paste', null, null);

    //if html contains junk tags
    if(unwantedRegex.test($('#paster').html())) {
        //replace html with cleaned html
        $('#paster').html(CleanMSWordPaste($('#paster').html()));

        //select all content of paste area
        var rng = document.body.createTextRange();
        rng.moveToElementText($('#paster')[0]);
        rng.select();

        //copy cleaned html
        $('#paster')[0].document.execCommand('copy', null, null);
    }

    //remove paste area from dom
    $('#paster').remove();

    //restore user's selected text
    oldRng.select();

    //preserves scroll position, focuses NicEditor and performs doc.execCommand('paste')
    //performance of this alone is fine.
    ExecCommand('paste');
}

I'm finding that this is taking quite a long time (ex 1 page of text from word). Is there anything I can do to speed this up? I'm thinking some sort of regex optimization but I don't really have any knowledge of how regexes work in the first place.

like image 937
Biff MaGriff Avatar asked Feb 17 '26 00:02

Biff MaGriff


1 Answers

It seems that your unwantedregexstring will end up looking something like this:

'<font>|<font\s[^>]*>|</font>|<span>|<span\s[^>]*>|</span>|...'

I'm no expert in regexp engine internals, but that looks a bit overly verbose to me. What if you change your algorithm so that unwantedregexstring looks like this instead?

'</?(font|span|...)\s?.*?>'

That will look for a < followed by an optional / followed by one of your specified tags followed by an optional whitespace character followed by zero or more but as few as possible of any character, until the closing > is encountered.

like image 97
Peter Herdenborg Avatar answered Feb 18 '26 15:02

Peter Herdenborg