I have a document fetched by a $.get
call, it's a big bloated HTML document. I need to use jQuery to grab an element from it.
I'm trying this (in coffeescript):
$.get url, (data) ->
title = $(data).find('title').text()
This doesn't work. In browser console I've whittled this down to $(document.documentElement.outerHTML).find('title')
where document.documentElement.outerHTML
gives a string of the document.
I've tried jQuery.parseHTML, with the same result.
The reason why it does not work is because jQuery expects a DOM node to find the 'title' tags. As you noted, you need to parse the html text first.
From here and here, the solution is to parse the string and append it into a temporal div (or other element):
var tempDom = $('<div></div>').append($.parseHTML(str));
Then, you can manipulate tempDom
to find elements.
Working demo: http://codepen.io/anon/pen/wKwLMP
DOMParser
APIvar htmlString = "<html><head><title>Name</title></head><body><div class='content'>Hello</div></body></html>";
var htmlDoc = (new DOMParser()).parseFromString(htmlString, "text/xml");
Unfortunately, there current answers don't hit a lot of edge cases
You should not use $.parseHTML(htmlString)
as it's immediately lossy. If we check the source code on $.parseHtml
, it'll call buildFragment
which creates a temporary DOM element and sets the innerHTML
property.
Element.innerHTML
provides an API for:
And here's the spec for Html Fragment Parsing Algorithm
Taking a sample string, here's the result of trying various HTML Parsing approaches:
var htmlString = "<html><head><title>Name</title></head><body><div class='content'>Hello</div></body></html>";
function ParseHtmlTests() {
/*** $.parseHTML ***/
var $parseHtml = $.parseHTML(htmlString)
console.LogOutput(
'1. $.parseHTML',
$parseHtml,
$parseHtml.map(function(el, i) { return el.outerHTML }),
$($parseHtml).find("title").text(),
$($parseHtml).find(".content").text()
)
/*** tempDiv.innerHTML ***/
var tempDiv = document.createElement("div")
tempDiv.innerHTML = htmlString
console.LogOutput(
'2. tempDiv.innerHTML',
tempDiv,
tempDiv.outerHTML,
$(tempDiv).find("title").text(),
$(tempDiv).find(".content").text()
)
/*** divAppendContents ***/
var $divAppendContents = $('<div></div>').append(htmlString)
console.LogOutput(
'3. divAppendContents',
$divAppendContents,
$divAppendContents.html(),
$divAppendContents.find("title").text(),
$divAppendContents.find(".content").text()
)
/*** tempHtml.innerHTML ***/
var tmpHtml = document.createElement( 'html' );
tmpHtml.innerHTML = htmlString;
console.LogOutput(
'4. tempHtml.innerHTML',
tmpHtml,
tmpHtml.outerHTML,
tmpHtml.getElementsByTagName('title')[0].innerText,
tmpHtml.getElementsByClassName('content')[0].innerText
)
/*** DOMParser.parseFromString ***/
var htmlDoc = (new DOMParser()).parseFromString(htmlString, "text/xml");
console.LogOutput(
'5. DOMParser.parseFromString',
htmlDoc,
htmlDoc.documentElement.outerHTML,
htmlDoc.documentElement.getElementsByTagName('title')[0].innerHTML,
htmlDoc.documentElement.getElementsByClassName('content')[0].innerHTML
)
}
/*** Create Console Log Methods ***/
console.group = console.group || function(msg) {
console.log(msg)
}
console.groupEnd = console.groupEnd || function(msg) {
console.log("----------------------------")
}
console.LogOutput = function(method, dom, html, title, content) {
console.group(method);
console.log("DOM:", dom)
console.log("HTML:", html)
console.log("Title:", title)
console.log("Content:", content)
console.groupEnd();
};
/*** Execute Script ***/
ParseHtmlTests()
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.js"></script>
And here's the output from the above script in chrome:
The best approach seems to be creating a HTML Root object by setting the innerHTML
of a temporary HTML document or by using the DOMParser
API
Further Reading:
jQuery.parseHTML()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With