Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Find all text nodes in HTML page [duplicate]

For this question I needed to find all text nodes under a particular node. I can do this like so:

function textNodesUnder(root){
  var textNodes = [];
  addTextNodes(root);
  [].forEach.call(root.querySelectorAll('*'),addTextNodes);
  return textNodes;

  function addTextNodes(el){
    textNodes = textNodes.concat(
      [].filter.call(el.childNodes,function(k){
        return k.nodeType==Node.TEXT_NODE;
      })
    );
  }
}

However, this seems inelegant in light of the fact that with XPath one could simply query for .//text() and be done with it.

What's the simplest way to get all text nodes under a particular element in an HTML document, that works on IE9+, Safari5+, Chrome19+, Firefox12+, Opera11+?

"Simplest" is defined loosely as "efficient and short, without golfing".

like image 679
Phrogz Avatar asked May 24 '12 02:05

Phrogz


2 Answers

Based on @kennebec's answer, a slightly tighter implementation of the same logic:

function textNodesUnder(node){
  var all = [];
  for (node=node.firstChild;node;node=node.nextSibling){
    if (node.nodeType==3) all.push(node);
    else all = all.concat(textNodesUnder(node));
  }
  return all;
}

However, far faster, tighter, and more elegant is using createTreeWalker so that the browser filters out everything but the text nodes for you:

function textNodesUnder(el){
  var n, a=[], walk=document.createTreeWalker(el,NodeFilter.SHOW_TEXT,null,false);
  while(n=walk.nextNode()) a.push(n);
  return a;
}
like image 183
Phrogz Avatar answered Nov 20 '22 09:11

Phrogz


function deepText(node){
    var A= [];
    if(node){
        node= node.firstChild;
        while(node!= null){
            if(node.nodeType== 3) A[A.length]=node;
            else A= A.concat(deepText(node));
            node= node.nextSibling;
        }
    }
    return A;
}
like image 6
kennebec Avatar answered Nov 20 '22 09:11

kennebec