Following a jQuery ajax call to retrieve an entire XHTML document, what is the best way to select specific elements from the resulting string? Perhaps there is a library or plugin that solves this issue?
jQuery can only select XHTML elements that exist in a string if they're normally allowed in a div in the W3C specification; therefore, I'm curious about selecting things like <title>
, <script>
, and <style>
.
According to the jQuery documentation:
http://docs.jquery.com/Core/jQuery#htmlownerDocument
The HTML string cannot contain elements that are invalid within a div, such as html, head, body, or title elements.
Therefore, since we have established that jQuery does not provide a way to do this, how would I select these elements? As an example, if you can show me how to select the remote page's title, that would be perfect!
Thanks, Pete
Instead of hacking jQuery to do this I'd suggest you drop out of jQuery for a minute and use raw XML dom methods. Using XML Dom methods you would can do this:
window.onload = function(){
$.ajax({
type: 'GET',
url: 'text.html',
dataType: 'html',
success: function(data) {
//cross platform xml object creation from w3schools
try //Internet Explorer
{
xmlDoc=new ActiveXObject("Microsoft.XMLDOM");
xmlDoc.async="false";
xmlDoc.loadXML(data);
}
catch(e)
{
try // Firefox, Mozilla, Opera, etc.
{
parser=new DOMParser();
xmlDoc=parser.parseFromString(data,"text/xml");
}
catch(e)
{
alert(e.message);
return;
}
}
alert(xmlDoc.getElementsByTagName("title")[0].childNodes[0].nodeValue);
}
});
}
No messing about with iframes etc.
Just an idea - tested in FF/Safari - seems to work if you create an iframe to store the document temporarily. Of course, if you are doing this it might be smarter to just use the src property of the iframe to load the document and do whatever you want in the "onload" of it.
$(function() {
$.ajax({
type: 'GET',
url: 'result.html',
dataType: 'html',
success: function(data) {
var $frame = $("<iframe src='about:blank'/>").hide();
$frame.appendTo('body');
var doc = $frame.get(0).contentWindow.document;
doc.write(data);
var $title = $("title", doc);
alert('Title: '+$title.text() );
$frame.remove();
}
});
});
I had to append the iframe to the body to get it to have a .contentWindow.
Inspired from this answer, but with deferred:
function fetchDoc(url) {
var dfd;
dfd = $.Deferred();
$.get(url).done(function (data, textStatus, jqXHR) {
var $iframe = $('<iframe style="display:none;"/>').appendTo('body');
var $doc = $iframe.contents();
var doc = $doc[0];
$iframe.load(function() {
dfd.resolveWith(doc, [data, textStatus, jqXHR]);
return $iframe.remove();
});
doc.open();
doc.write(data);
return doc.close();
}).fail(dfd.reject);
return dfd.promise();
};
And smoke it with:
fetchDoc('/foo.html').done(function (data, textStatus, jqXHR) {
alert($('title', this).text());
});
LIVE DEMO (click 'Run')
How about some quick tag renaming?
$.ajax({
type : "GET",
url : 'results.html',
dataType : "html",
success: function(data) {
data = data.replace(/html/g, "xhtmlx");
data = data.replace(/head/g, "xheadx");
data = data.replace(/title/g, "xtitlex");
data = data.replace(/body/g, "xbodyx");
alert($(data).find("xtitlex").text());
}
});
This works. I just split up the building blocks for better readability.
Check the explanation and the inline comments to grasp the workings of this and why it has to be made like this.
Of course this can't be used to retrieve cross-domain-content for that you either have to proxy the calls through a script of yours or think about integration something like flXHR (Cross-Domain Ajax with Flash)
call.html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>asd</title>
<script src="jquery.js" type="text/javascript"></script>
<script src="xmlDoc.js" type="text/javascript"></script>
<script src="output.js" type="text/javascript"></script>
<script src="ready.js" type="text/javascript"></script>
</head>
<body>
<div>
<input type="button" id="getit" value="GetIt" />
</div>
</body>
</html>
jquery.js is (jQuery 1.3.2 uncompressed) test.html a valid XHTML-Document
xmlDoc.js
// helper function to create XMLDocument out of a string
jQuery.createXMLDocument = function( s ) {
var xmlDoc;
// is it a IE?
if ( window.ActiveXObject ) {
xmlDoc = new ActiveXObject('Microsoft.XMLDOM');
xmlDoc.async = "false";
// prevent erros as IE tries to resolve the URL in the DOCTYPE
xmlDoc.resolveExternals = false;
xmlDoc.validateOnParse = false;
xmlDoc.loadXML(s);
} else {
// non IE. give me DOMParser
// theoretically this else branch should never be called
// but just in case.
xmlDoc = ( new DOMParser() ).parseFromString( s, "text/xml" );
}
return xmlDoc;
};
output.js
// Output the title of the loaded page
// And get the script-tags and output either the
// src attribute or code
function headerData(data) {
// give me the head element
var x = jQuery("head", data).eq(0);
// output title
alert(jQuery("title", x).eq(0).text());
// for all scripttags which include a file out put src
jQuery("script[src]", x).each(function(index) {
alert((index+1)+" "+jQuery.attr(this, 'src'));
});
// for all scripttags which are inline javascript output code
jQuery("script:not([src])", x).each(function(index) {
alert(this.text);
});
}
ready.js
$(document).ready(function() {
$('#getit').click(function() {
$.ajax({
type : "GET",
url : 'test.html',
dataType : "xml",
// overwrite content-type returned by server to ensure
// the response getst treated as xml
beforeSend: function(xhr) {
// IE doesn't support this so check before using
if (xhr.overrideMimeType) {
xhr.overrideMimeType('text/xml');
}
},
success: function(data) {
headerData(data);
},
error : function(xhr, textStatus, errorThrown) {
// if loading the response as xml failed try it manually
// in theory this should only happen for IE
// maybe some
if (textStatus == 'parsererror') {
var xmlDoc = jQuery.createXMLDocument(xhr.responseText);
headerData(xmlDoc);
} else {
alert("Failed: " + textStatus + " " + errorThrown);
}
}
});
});
});
In Opera the whole thing works without the createXMLDocument
and the beforeSend
function.
The extra trickery is needed for Firefox (3.0.11) and IE6 (can't test IE7, IE8, other browsers) as they have a problem when the Content-Type:
returned by the server doesn't indicate that it's xml. My webserver returned Content-Type: text/html; charset=UTF-8
for test.html.
In those two browsers jQuery called the error
callback with textStatus
saying parsererror
. Because in line 3706 in jQuery.js
data = xml ? xhr.responseXML : xhr.responseText;
data
is being set to null. As in FF and IE the xhr.responseXML
is null. This happens because they don't get that the returned data is xml (as Opera does). And only xhr.responseText
is set with the whole xhtml-code. As data is null the line 3708
if ( xml && data.documentElement.tagName == "parsererror" )
throws an exception which is catched in line 3584 and status is set to parsererror
.
In FF I can solve the problem by using the overrideMimeType()
function before sending the request.
But IE doesn't support that function on the XMLHttpRequest-object so I have to generate the XMLDocument myself if the error-callback is run and the error is parsererror
.
example for test.html
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>Plugins | jQuery Plugins</title>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript">var imagePath = '/content/img/so/';</script>
</head>
<body>
</body>
</html>
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With