Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Regex to transform hashtag in a link without breaking existing HTML code

I want to convert all the URLs in a javascript string to links, in this strings there are also words that begin with a hashtag #.

As of now I created two regex in cascade, one that creates html anchor tags based on urls and another that creates anchor tags for the hashtags (like in Twitter).

I am having a lot of problems trying to parse www.sitename.com/index.php#someAnchor into the right markup.

content = urlifyLinks(content);
content = urlifyHashtags(content);

where the two functions are as follows:

function urlifyHashtags(text) {
    var hashtagRegex = /^#([a-zA-Z0-9]+)/g;
    var tempText = text.replace(hashtagRegex, '<a href="index.php?keywords=$1">#$1</a>');

    var hashtagRegex2 = /([^&])#([a-zA-Z0-9]+)/g;
    tempText = tempText.replace(hashtagRegex2, '$1<a href="index.php?keywords=$2">#$2</a>');

    return tempText;
}

function urlifyLinks(inputText) {
    var replaceText, replacePattern1, replacePattern2, replacePattern3;

    replacePattern1 = /(\b(https?|ftp):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/gim;
    replacedText = inputText.replace(replacePattern1, '<a href="$1" target="_blank">$1</a>');

    replacePattern2 = /(^|[^\/])(www\.[\S]+(\b|$))/gim;
    replacedText = replacedText.replace(replacePattern2, '$1<a href="http://$2" target="_blank">$2</a>');

    replacePattern3 = /(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;
    replacedText = replacedText.replace(replacePattern3, '<a href="mailto:$1">$1</a>');
    return replacedText;
}

I am considering to parse the output of urlifyLinks and apply the regex to all the dom elements that are text elements on the first level, is that an ugly thing to do?

like image 528
Luke Morgan Avatar asked Dec 21 '22 20:12

Luke Morgan


1 Answers

You can avoid this problem by using a single regex with a callback function replacement.

For example:

function linkify(str){
    // order matters
    var re = [
        "\\b((?:https?|ftp)://[^\\s\"'<>]+)\\b",
        "\\b(www\\.[^\\s\"'<>]+)\\b",
        "\\b(\\w[\\w.+-]*@[\\w.-]+\\.[a-z]{2,6})\\b", 
        "#([a-z0-9]+)"];
    re = new RegExp(re.join('|'), "gi");

    return str.replace(re, function(match, url, www, mail, twitler){
        if(url)
            return "<a href=\"" + url + "\">" + url + "</a>";
        if(www)
            return "<a href=\"http://" + www + "\">" + www + "</a>";
        if(mail)
            return "<a href=\"mailto:" + mail + "\">" + mail + "</a>";
        if(twitler)
            return "<a href=\"foo?bar=" + twitler + "\">#" + twitler + "</a>";

        // shouldnt get here, but just in case
        return match;
    });
}

Twitler

like image 68
Qtax Avatar answered Dec 23 '22 10:12

Qtax