Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Regex to match urls but not urls in hyperlinks

I am trying to wrap any url that is in some text and turn it into a hyperlink... but I do not want to wrap a url that is already wrapped by a hyperlink.

For example:

<a href="http://twitter.com">Go To Twitter</a>
here is a url http://anotherurl.com

The following code:

function replaceURLWithHTMLLinks(text) {
  var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig;
  return text.replace(exp, "<a href='$1'>$1</a>");
}

Gives the following output:

<a href="<a href='http://twitter.com/twitter'>http://twitter.com/twitter</a>">@BIR</a>
<a href="http://anotherurl.com">http://anotherurl.com</a>

How can I modify the regex to exclude already hyperlinked urls?

Thanks

Answer:

The new method is:

function replaceURLWithHTMLLinks(text) {
  var exp = /(?:^|[^"'])((ftp|http|https|file):\/\/[\S]+(\b|$))/gi
  return text.replace(exp, " <a href='$1'>$1</a>");
}

The above code functions as required. I modified the regex from a link in the comments because it contained a bug where it would include the full stop, it now excludes any full stops that come after a full url.

like image 736
Base33 Avatar asked Aug 08 '12 11:08

Base33


1 Answers

Since javascript doesn't seem to support negative look-behind, you will have to trick it by using a replace function. Capture the href (maybe you should also also consider src) :

function repl(text) {
  var exp = /((href|src)=["']|)(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig;
  return text.replace(exp, function() {
    return  arguments[1] ? 
            arguments[0] : 
            "<a href=\"" + arguments[3] + "\">" + arguments[3] + "</a>"
  });
}

See the demo

EDIT

A "better" version which will only replace links in actual text nodes:

function repl(node) {
  var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/i;
  var nodes=node.childNodes;
  for (var i=0, m=nodes.length; i<m; i++){
    var n=nodes[i];
    if (n.nodeType==n.TEXT_NODE) {
      var g=n.textContent.match(exp);
      while(g) {
        var idx=n.textContent.indexOf(g[0]);
        var pre=n.textContent.substring(0,idx);
        var t=document.createTextNode(pre);
        var a=document.createElement("a");
        a.href=g[0];
        a.innerText=g[0];
        n.textContent = n.textContent.substring(idx+g[0].length);
        n.parentElement.insertBefore(t,n);
        n.parentElement.insertBefore(a,n);
        g=n.textContent.match(exp);
      }
    }
    else {
      repl(n);
    }
  }
}

var r=repl(document.getElementById("t"))

​ See the demo

like image 106
Julien Ch. Avatar answered Oct 22 '22 23:10

Julien Ch.