Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to get all elements in DOM and text without tags

var elements = document.body.getElementsByTagName('*');

for(var b = 0; b < elements.length; b++) {
     // Here is DOM elemenets with tags
     // I need to get here DOM elements with HTML tags and DOM elements without HTML tags.
}

How I can do it? Thanks in advance.

jsfiddle: http://jsfiddle.net/Y9B4B/

(vanilla.js)

like image 424
owl Avatar asked Sep 10 '25 21:09

owl


2 Answers

So, it appears from your jsFiddle that what you want is the text that is in the <body>, but not in any other tag. I call that "top level text". You can collect the top level text nodes like this by iterating through the childNodes of the body element and collecting just the text nodes. Any text in another tag will be a child of that tag, not a direct child of the body.

function getTopTextNodes() {
    var textNodes = [];
    var topNodes = document.body.childNodes;
    for (var i = 0; i < topNodes.length; i++) {
        if (topNodes[i].nodeType == 3) {
            textNodes.push(topNodes[i]);
        }
    }
    return textNodes;
}

Or, if you want the blocks of text:

function getTopText() {
    var text = [];
    var topNodes = document.body.childNodes;
    for (var i = 0; i < topNodes.length; i++) {
        if (topNodes[i].nodeType == 3) {
            text.push(topNodes[i].nodeValue);
        }
    }
    return text;
}

Keep in mind that what appears in the document as a single piece of text could be in multiple neighboring text nodes. If you want to combine text from consecutive text nodes, that can be done like this:

function getTopTextCombined() {
    var text = [];
    var lastNodeType;
    var topNodes = document.body.childNodes;
    for (var i = 0; i < topNodes.length; i++) {
        if (topNodes[i].nodeType == 3) {
            if (lastNodeType === 3) {
                text[text.length - 1] += topNodes[i].nodeValue;
            } else {
                text.push(topNodes[i].nodeValue);
            }
        }
        lastNodeType = topNodes[i].nodeType;
    }
    return text;
}

Note that different browsers will put things into text nodes slightly differently. In Chrome, you will get some text nodes with only whitespace in them which you may need to ignore if you just want visible text and there may be \n characters in the text too.

like image 131
jfriend00 Avatar answered Sep 13 '25 10:09

jfriend00


Based upon your fiddle, I think I get what you mean. Try this: http://jsfiddle.net/b5LwP/1/

var elements = document.body.childNodes;

for(var b = 0; b < elements.length; b++) {
    if (elements[b].textContent.length) {
        console.log(elements[b].textContent);
    }
}

From the markup

<div class="div-class">I have a tag.</div>
I don't have a tag.
<div class="div-class">I have a tag too.</div>

the console will log

I have a tag.
I don't have a tag.
I have a tag too.
like image 44
Charlie Schliesser Avatar answered Sep 13 '25 10:09

Charlie Schliesser