The getHost() method of URL class returns the hostname of the URL. This method will return the IPv6 address enclosed in square brackets ('['and']').
A URL (aka Universal Resource Locator) is a complete web address used to find a particular web page. While the domain is the name of the website, a URL will lead to any one of the pages within the website.
A neat trick without using regular expressions:
var tmp = document.createElement ('a');
; tmp.href = "http://www.example.com/12xy45";
// tmp.hostname will now contain 'www.example.com'
// tmp.host will now contain hostname and port 'www.example.com:80'
Wrap the above in a function such as the below and you have yourself a superb way of snatching the domain part out of an URI.
function url_domain(data) {
var a = document.createElement('a');
a.href = data;
return a.hostname;
}
I recommend using the npm package psl (Public Suffix List). The "Public Suffix List" is a list of all valid domain suffixes and rules, not just Country Code Top-Level domains, but unicode characters as well that would be considered the root domain (i.e. www.食狮.公司.cn, b.c.kobe.jp, etc.). Read more about it here.
Try:
npm install --save psl
Then with my "extractHostname" implementation run:
let psl = require('psl');
let url = 'http://www.youtube.com/watch?v=ClkQA2Lb_iE';
psl.get(extractHostname(url)); // returns youtube.com
I can't use an npm package, so below only tests extractHostname.
function extractHostname(url) {
var hostname;
//find & remove protocol (http, ftp, etc.) and get hostname
if (url.indexOf("//") > -1) {
hostname = url.split('/')[2];
}
else {
hostname = url.split('/')[0];
}
//find & remove port number
hostname = hostname.split(':')[0];
//find & remove "?"
hostname = hostname.split('?')[0];
return hostname;
}
//test the code
console.log("== Testing extractHostname: ==");
console.log(extractHostname("http://www.blog.classroom.me.uk/index.php"));
console.log(extractHostname("http://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractHostname("https://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractHostname("www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractHostname("ftps://ftp.websitename.com/dir/file.txt"));
console.log(extractHostname("websitename.com:1234/dir/file.txt"));
console.log(extractHostname("ftps://websitename.com:1234/dir/file.txt"));
console.log(extractHostname("example.com?param=value"));
console.log(extractHostname("https://facebook.github.io/jest/"));
console.log(extractHostname("//youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractHostname("http://localhost:4200/watch?v=ClkQA2Lb_iE"));
// Warning: you can use this function to extract the "root" domain, but it will not be as accurate as using the psl package.
function extractRootDomain(url) {
var domain = extractHostname(url),
splitArr = domain.split('.'),
arrLen = splitArr.length;
//extracting the root domain here
//if there is a subdomain
if (arrLen > 2) {
domain = splitArr[arrLen - 2] + '.' + splitArr[arrLen - 1];
//check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk")
if (splitArr[arrLen - 2].length == 2 && splitArr[arrLen - 1].length == 2) {
//this is using a ccTLD
domain = splitArr[arrLen - 3] + '.' + domain;
}
}
return domain;
}
//test extractRootDomain
console.log("== Testing extractRootDomain: ==");
console.log(extractRootDomain("http://www.blog.classroom.me.uk/index.php"));
console.log(extractRootDomain("http://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomain("https://www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomain("www.youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomain("ftps://ftp.websitename.com/dir/file.txt"));
console.log(extractRootDomain("websitename.co.uk:1234/dir/file.txt"));
console.log(extractRootDomain("ftps://websitename.com:1234/dir/file.txt"));
console.log(extractRootDomain("example.com?param=value"));
console.log(extractRootDomain("https://facebook.github.io/jest/"));
console.log(extractRootDomain("//youtube.com/watch?v=ClkQA2Lb_iE"));
console.log(extractRootDomain("http://localhost:4200/watch?v=ClkQA2Lb_iE"));
Regardless having the protocol or even port number, you can extract the domain. This is a very simplified, non-regex solution, so I think this will do.
*Thank you @Timmerz, @renoirb, @rineez, @BigDong, @ra00l, @ILikeBeansTacos, @CharlesRobertson for your suggestions! @ross-allen, thank you for reporting the bug!
There is no need to parse the string, just pass your URL as an argument to URL
constructor:
const url = 'http://www.youtube.com/watch?v=ClkQA2Lb_iE';
const { hostname } = new URL(url);
console.assert(hostname === 'www.youtube.com');
Try this:
var matches = url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
var domain = matches && matches[1]; // domain will be null if no match is found
If you want to exclude the port from your result, use this expression instead:
/^https?\:\/\/([^\/:?#]+)(?:[\/:?#]|$)/i
Edit: To prevent specific domains from matching, use a negative lookahead. (?!youtube.com)
/^https?\:\/\/(?!(?:www\.)?(?:youtube\.com|youtu\.be))([^\/:?#]+)(?:[\/:?#]|$)/i
There are two good solutions for this, depending on whether you need to optimize for performance or not (and without external dependencies!):
URL.hostname
for readabilityThe cleanest and easiest solution is to use URL.hostname
.
const getHostname = (url) => {
// use URL constructor and return hostname
return new URL(url).hostname;
}
// tests
console.log(getHostname("https://stackoverflow.com/questions/8498592/extract-hostname-name-from-string/"));
console.log(getHostname("https://developer.mozilla.org/en-US/docs/Web/API/URL/hostname"));
URL.hostname
is part of the URL API, supported by all major browsers except IE (caniuse). Use a URL polyfill if you need to support legacy browsers.
Bonus: using the URL constructor will also give you access to other URL properties and methods!
URL.hostname
should be your choice for most use cases. However, it's still much slower than this regex (test it yourself on jsPerf):
const getHostnameFromRegex = (url) => {
// run against regex
const matches = url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
// extract hostname (will be null if no match is found)
return matches && matches[1];
}
// tests
console.log(getHostnameFromRegex("https://stackoverflow.com/questions/8498592/extract-hostname-name-from-string/"));
console.log(getHostnameFromRegex("https://developer.mozilla.org/en-US/docs/Web/API/URL/hostname"));
You should probably use URL.hostname
. If you need to process an incredibly large number of URLs (where performance would be a factor), consider RegEx.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With